diff options
92 files changed, 117831 insertions, 41 deletions
diff --git a/acinclude.m4 b/acinclude.m4 index 39b67287b4..4470342761 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -584,7 +584,7 @@ PHP_REAL_ARG_BUNDLE([$1],[$2],[$3],[$4],PHP_[]translit($1,a-z-,A-Z_),[ifelse($5, AC_DEFUN([PHP_REAL_ARG_BUNDLE],[ ifelse([$2],,,[AC_MSG_CHECKING([$2])]) -AC_ARG_ENABLE($1,[$3],$5=[$]enableval, +AC_ARG_WITH($1,[$3],$5=[$]enableval, [ $5=ifelse($4,,no,$4) diff --git a/bundle/expat/config.m4 b/bundle/expat/config.m4 index 9852dfc911..89e12be2c6 100644 --- a/bundle/expat/config.m4 +++ b/bundle/expat/config.m4 @@ -1,5 +1,13 @@ -PHP_ARG_BUNDLE(bundle-expat, Whether to enable XML support, -[ --disable-bundle-expat Use the bundled expat library], yes) +dnl +dnl $Id$ +dnl + +PHP_ARG_BUNDLE(bundle-expat, Whether to bundle the expat library, +[ --without-bundle-expat Disable the bundled expat library], yes) + +# BC ini option +PHP_ARG_WITH(expat-dir, external libexpat install dir, +[ --with-expat-dir=DIR XML: external libexpat install dir], no, no) if test "$PHP_BUNDLE_EXPAT" = "yes" && test "$PHP_EXPAT_DIR" = "no"; then AC_C_BIGENDIAN @@ -10,8 +18,26 @@ if test "$PHP_BUNDLE_EXPAT" = "yes" && test "$PHP_EXPAT_DIR" = "no"; then fi AC_DEFINE(HAVE_LIBEXPAT_BUNDLED, 1, [ ]) + AC_DEFINE(HAVE_LIBEXPAT, 1, [ ]) AC_DEFINE(BYTEORDER, $order, [ ]) PHP_ADD_SOURCES(bundle/expat, xmlparse.c xmlrole.c xmltok.c) PHP_ADD_INCLUDE(bundle/expat) PHP_ADD_BUILD_DIR(bundle/expat) +elif test "$PHP_BUNDLE_EXPAT" != "no" && test "$PHP_EXPAT_DIR" != "no"; then + # Find external expat library + + for i in $PHP_BUNDLE_EXPAT $PHP_EXPAT_DIR; do + if test -f $i/lib/libexpat.a -o -f $i/lib/libexpat.$SHLIB_SUFFIX_NAME; then + EXPAT_DIR=$i + fi + done + + if test -z "$EXPAT_DIR"; then + AC_MSG_ERROR(not found. Please reinstall the expat distribution.) + fi + + AC_DEFINE(HAVE_LIBEXPAT, 1, [ ]) + PHP_ADD_INCLUDE($EXPAT_DIR/include) + PHP_ADD_LIBRARY_WITH_PATH(expat, $EXPAT_DIR/lib, EXPAT_SHARED_LIBADD) + PHP_SUBST(EXPAT_SHARED_LIBADD) fi diff --git a/bundle/libxml/DOCBparser.c b/bundle/libxml/DOCBparser.c new file mode 100644 index 0000000000..f84a0be450 --- /dev/null +++ b/bundle/libxml/DOCBparser.c @@ -0,0 +1,6107 @@ +/* + * DOCBparser.c : an attempt to parse SGML Docbook documents + * + * This is extremely hackish. It also adds one extension + * <?sgml-declaration encoding="ISO-8859-1"?> + * allowing to store the encoding of the document within the instance. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" +#ifdef LIBXML_DOCB_ENABLED + +#include <string.h> +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/SAX.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/xmlerror.h> +#include <libxml/DOCBparser.h> +#include <libxml/entities.h> +#include <libxml/encoding.h> +#include <libxml/valid.h> +#include <libxml/xmlIO.h> +#include <libxml/uri.h> +#include <libxml/globals.h> + +/* + * DocBook XML current versions + */ + +#define XML_DOCBOOK_XML_PUBLIC (const xmlChar *) \ + "-//OASIS//DTD DocBook XML V4.1.2//EN" +#define XML_DOCBOOK_XML_SYSTEM (const xmlChar *) \ + "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" + +/* + * Internal description of an SGML entity + */ +typedef struct _docbEntityDesc docbEntityDesc; +typedef docbEntityDesc *docbEntityDescPtr; +struct _docbEntityDesc { + int value; /* the UNICODE value for the character */ + const char *name; /* The entity name */ + const char *desc; /* the description */ +}; + +static int docbParseCharRef(docbParserCtxtPtr ctxt); +static xmlEntityPtr docbParseEntityRef(docbParserCtxtPtr ctxt, + xmlChar **str); +static void docbParseElement(docbParserCtxtPtr ctxt); +static void docbParseContent(docbParserCtxtPtr ctxt); + +/* + * Internal description of an SGML element + */ +typedef struct _docbElemDesc docbElemDesc; +typedef docbElemDesc *docbElemDescPtr; +struct _docbElemDesc { + const char *name; /* The tag name */ + int startTag; /* Whether the start tag can be implied */ + int endTag; /* Whether the end tag can be implied */ + int empty; /* Is this an empty element ? */ + int depr; /* Is this a deprecated element ? */ + int dtd; /* 1: only in Loose DTD, 2: only Frameset one */ + const char *desc; /* the description */ +}; + + +#define DOCB_MAX_NAMELEN 1000 +#define DOCB_PARSER_BIG_BUFFER_SIZE 1000 +#define DOCB_PARSER_BUFFER_SIZE 100 + +/* #define DEBUG */ +/* #define DEBUG_PUSH */ + +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ + +/** + * docbnamePush: + * @ctxt: a DocBook SGML parser context + * @value: the element name + * + * Pushes a new element name on top of the name stack + * + * Returns 0 in case of error, the index in the stack otherwise + */ +static int +docbnamePush(docbParserCtxtPtr ctxt, xmlChar * value) +{ + if (ctxt->nameNr >= ctxt->nameMax) { + ctxt->nameMax *= 2; + ctxt->nameTab = + (xmlChar * *)xmlRealloc(ctxt->nameTab, + ctxt->nameMax * + sizeof(ctxt->nameTab[0])); + if (ctxt->nameTab == NULL) { + xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); + return (0); + } + } + ctxt->nameTab[ctxt->nameNr] = value; + ctxt->name = value; + return (ctxt->nameNr++); +} +/** + * docbnamePop: + * @ctxt: a DocBook SGML parser context + * + * Pops the top element name from the name stack + * + * Returns the name just removed + */ +static xmlChar * +docbnamePop(docbParserCtxtPtr ctxt) +{ + xmlChar *ret; + + if (ctxt->nameNr < 0) + return (0); + ctxt->nameNr--; + if (ctxt->nameNr < 0) + return (0); + if (ctxt->nameNr > 0) + ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; + else + ctxt->name = NULL; + ret = ctxt->nameTab[ctxt->nameNr]; + ctxt->nameTab[ctxt->nameNr] = 0; + return (ret); +} + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one need to make assumption on the context to use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled + * in ISO-Latin or UTF-8, and the current 16 bit value if compiled + * in UNICODE mode. This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR + * it should be used only to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * + * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding + * + * CURRENT Returns the current char value, with the full decoding of + * UTF-8 if we are using this mode. It returns an int. + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly + */ + +#define UPPER (toupper(*ctxt->input->cur)) + +#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val) + +#define NXT(val) ctxt->input->cur[(val)] + +#define UPP(val) (toupper(ctxt->input->cur[(val)])) + +#define CUR_PTR ctxt->input->cur + +#define SHRINK xmlParserInputShrink(ctxt->input) + +#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK) + +#define CURRENT ((int) (*ctxt->input->cur)) + +#define SKIP_BLANKS docbSkipBlankChars(ctxt) + +/* Imported from XML */ + +/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ +#define CUR ((int) (*ctxt->input->cur)) +#define NEXT xmlNextChar(ctxt),ctxt->nbChars++ + +#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) +#define NXT(val) ctxt->input->cur[(val)] +#define CUR_PTR ctxt->input->cur + + +#define NEXTL(l) do { \ + if (*(ctxt->input->cur) == '\n') { \ + ctxt->input->line++; ctxt->input->col = 1; \ + } else ctxt->input->col++; \ + ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ + } while (0) + +/************ + \ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ + if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); + ************/ + +#define CUR_CHAR(l) docbCurrentChar(ctxt, &l) +#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) + +#define COPY_BUF(l,b,i,v) \ + if (l == 1) b[i++] = (xmlChar) v; \ + else i += xmlCopyChar(l,&b[i],v) + +/** + * docbCurrentChar: + * @ctxt: the DocBook SGML parser context + * @len: pointer to the length of the char read + * + * The current char value, if using UTF-8 this may actually span multiple + * bytes in the input buffer. Implement the end of line normalization: + * 2.11 End-of-Line Handling + * If the encoding is unspecified, in the case we find an ISO-Latin-1 + * char, then the encoding converter is plugged in automatically. + * + * Returns the current char value and its length + */ + +static int +docbCurrentChar(xmlParserCtxtPtr ctxt, int *len) { + if (ctxt->instate == XML_PARSER_EOF) + return(0); + + if (ctxt->token != 0) { + *len = 0; + return(ctxt->token); + } + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + const unsigned char *cur = ctxt->input->cur; + unsigned char c; + unsigned int val; + + c = *cur; + if (c & 0x80) { + if (cur[1] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + + if (cur[2] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (cur[3] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + if (!IS_CHAR(val)) { + ctxt->errNo = XML_ERR_INVALID_ENCODING; + if ((ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Char 0x%X out of allowed range\n", val); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + return(val); + } else { + /* 1-byte code */ + *len = 1; + return((int) *ctxt->input->cur); + } + } + /* + * Assume it's a fixed length encoding (1) with + * a compatible encoding for the ASCII set, since + * XML constructs only use < 128 chars + */ + *len = 1; + if ((int) *ctxt->input->cur < 0x80) + return((int) *ctxt->input->cur); + + /* + * Humm this is bad, do an automatic flow conversion + */ + xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(xmlCurrentChar(ctxt, len)); + +encoding_error: + /* + * If we detect an UTF8 error that probably mean that the + * input encoding didn't get properly advertized in the + * declaration header. Report the error and switch the encoding + * to ISO-Latin-1 (if you don't like this policy, just declare the + * encoding !) + */ + ctxt->errNo = XML_ERR_INVALID_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { + ctxt->sax->error(ctxt->userData, + "Input is not proper UTF-8, indicate encoding !\n"); + ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); + } + + ctxt->charset = XML_CHAR_ENCODING_8859_1; + *len = 1; + return((int) *ctxt->input->cur); +} + +#if 0 +/** + * sgmlNextChar: + * @ctxt: the DocBook SGML parser context + * + * Skip to the next char input char. + */ + +static void +sgmlNextChar(docbParserCtxtPtr ctxt) { + if (ctxt->instate == XML_PARSER_EOF) + return; + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { + xmlPopInput(ctxt); + } else { + if (*(ctxt->input->cur) == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else ctxt->input->col++; + ctxt->input->cur++; + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + } +} +#endif + +/** + * docbSkipBlankChars: + * @ctxt: the DocBook SGML parser context + * + * skip all blanks character found at that point in the input streams. + * + * Returns the number of space chars skipped + */ + +static int +docbSkipBlankChars(xmlParserCtxtPtr ctxt) { + int res = 0; + + while (IS_BLANK(*(ctxt->input->cur))) { + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { + xmlPopInput(ctxt); + } else { + if (*(ctxt->input->cur) == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else ctxt->input->col++; + ctxt->input->cur++; + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + } + res++; + } + return(res); +} + + + +/************************************************************************ + * * + * The list of SGML elements and their properties * + * * + ************************************************************************/ + +/* + * Start Tag: 1 means the start tag can be ommited + * End Tag: 1 means the end tag can be ommited + * 2 means it's forbidden (empty elements) + * Depr: this element is deprecated + * DTD: 1 means that this element is valid only in the Loose DTD + * 2 means that this element is valid only in the Frameset DTD + * + * Name,Start Tag,End Tag, Empty, Depr., DTD, Description + */ +static docbElemDesc +docbookElementTable[] = { +{ "abbrev", 0, 0, 0, 3, 0, "" }, /* word */ +{ "abstract", 0, 0, 0, 9, 0, "" }, /* title */ +{ "accel", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "ackno", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "acronym", 0, 0, 0, 3, 0, "" }, /* word */ +{ "action", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "address", 0, 0, 0, 1, 0, "" }, +{ "affiliation",0, 0, 0, 9, 0, "" }, /* shortaffil */ +{ "alt", 0, 0, 0, 1, 0, "" }, +{ "anchor", 0, 2, 1, 0, 0, "" }, +{ "answer", 0, 0, 0, 9, 0, "" }, /* label */ +{ "appendix", 0, 0, 0, 9, 0, "" }, /* appendixinfo */ +{ "appendixinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "application",0, 0, 0, 2, 0, "" }, /* para */ +{ "area", 0, 2, 1, 0, 0, "" }, +{ "areaset", 0, 0, 0, 9, 0, "" }, /* area */ +{ "areaspec", 0, 0, 0, 9, 0, "" }, /* area */ +{ "arg", 0, 0, 0, 1, 0, "" }, +{ "artheader", 0, 0, 0, 9, 0, "" }, +{ "article", 0, 0, 0, 9, 0, "" }, /* div.title.content */ +{ "articleinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "artpagenums",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "attribution",0, 0, 0, 2, 0, "" }, /* para */ +{ "audiodata", 0, 2, 1, 0, 0, "" }, +{ "audioobject",0, 0, 0, 9, 0, "" }, /* objectinfo */ +{ "authorblurb",0, 0, 0, 9, 0, "" }, /* title */ +{ "authorgroup",0, 0, 0, 9, 0, "" }, /* author */ +{ "authorinitials",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "author", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */ +{ "beginpage", 0, 2, 1, 0, 0, "" }, +{ "bibliodiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ +{ "biblioentry",0, 0, 0, 9, 0, "" }, /* articleinfo */ +{ "bibliography",0, 0, 0, 9, 0, "" }, /* bibliographyinfo */ +{ "bibliographyinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "bibliomisc", 0, 0, 0, 2, 0, "" }, /* para */ +{ "bibliomixed",0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix, bibliomset) */ +{ "bibliomset", 0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix; | bibliomset) */ +{ "biblioset", 0, 0, 0, 9, 0, "" }, /* bibliocomponent.mix */ +{ "blockquote", 0, 0, 0, 9, 0, "" }, /* title */ +{ "book", 0, 0, 0, 9, 0, "" }, /* div.title.content */ +{ "bookinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "bridgehead", 0, 0, 0, 8, 0, "" }, /* title */ +{ "callout", 0, 0, 0, 9, 0, "" }, /* component.mix */ +{ "calloutlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "caption", 0, 0, 0, 9, 0, "" }, /* textobject.mix */ +{ "caution", 0, 0, 0, 9, 0, "" }, /* title */ +{ "chapter", 0, 0, 0, 9, 0, "" }, /* chapterinfo */ +{ "chapterinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "citation", 0, 0, 0, 2, 0, "" }, /* para */ +{ "citerefentry",0, 0, 0, 9, 0, "" }, /* refentrytitle */ +{ "citetitle", 0, 0, 0, 2, 0, "" }, /* para */ +{ "city", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "classname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "classsynopsisinfo",0,0, 0, 9, 0, "" }, /* cptr */ +{ "classsynopsis",0, 0, 0, 9, 0, "" }, /* ooclass */ +{ "cmdsynopsis",0, 0, 0, 9, 0, "" }, /* command */ +{ "co", 0, 2, 1, 0, 0, "" }, +{ "collab", 0, 0, 0, 9, 0, "" }, /* collabname */ +{ "collabname", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "colophon", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ +{ "colspec", 0, 2, 1, 0, 0, "" }, +{ "colspec", 0, 2, 1, 0, 0, "" }, +{ "command", 0, 0, 0, 9, 0, "" }, /* cptr */ +{ "computeroutput",0, 0, 0, 9, 0, "" }, /* cptr */ +{ "confdates", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "confgroup", 0, 0, 0, 9, 0, "" }, /* confdates */ +{ "confnum", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "confsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "conftitle", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "constant", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "constructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */ +{ "contractnum",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "contractsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "contrib", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "copyright", 0, 0, 0, 9, 0, "" }, /* year */ +{ "corpauthor", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "corpname", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "country", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "database", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "date", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "dedication", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ +{ "destructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */ +{ "docinfo", 0, 0, 0, 9, 0, "" }, +{ "edition", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "editor", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */ +{ "email", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "emphasis", 0, 0, 0, 2, 0, "" }, /* para */ +{ "entry", 0, 0, 0, 9, 0, "" }, /* tbl.entry.mdl */ +{ "entrytbl", 0, 0, 0, 9, 0, "" }, /* tbl.entrytbl.mdl */ +{ "envar", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "epigraph", 0, 0, 0, 9, 0, "" }, /* attribution */ +{ "equation", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "errorcode", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "errorname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "errortype", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "example", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "exceptionname",0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "fax", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "fieldsynopsis", 0, 0, 0, 9, 0, "" }, /* modifier */ +{ "figure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "filename", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "firstname", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "firstterm", 0, 0, 0, 3, 0, "" }, /* word */ +{ "footnote", 0, 0, 0, 9, 0, "" }, /* footnote.mix */ +{ "footnoteref",0, 2, 1, 0, 0, "" }, +{ "foreignphrase",0, 0, 0, 2, 0, "" }, /* para */ +{ "formalpara", 0, 0, 0, 9, 0, "" }, /* title */ +{ "funcdef", 0, 0, 0, 1, 0, "" }, +{ "funcparams", 0, 0, 0, 9, 0, "" }, /* cptr */ +{ "funcprototype",0, 0, 0, 9, 0, "" }, /* funcdef */ +{ "funcsynopsis",0, 0, 0, 9, 0, "" }, /* funcsynopsisinfo */ +{ "funcsynopsisinfo", 0, 0, 0, 9, 0, "" }, /* cptr */ +{ "function", 0, 0, 0, 9, 0, "" }, /* cptr */ +{ "glossary", 0, 0, 0, 9, 0, "" }, /* glossaryinfo */ +{ "glossaryinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "glossdef", 0, 0, 0, 9, 0, "" }, /* glossdef.mix */ +{ "glossdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ +{ "glossentry", 0, 0, 0, 9, 0, "" }, /* glossterm */ +{ "glosslist", 0, 0, 0, 9, 0, "" }, /* glossentry */ +{ "glossseealso",0, 0, 1, 2, 0, "" }, /* para */ +{ "glosssee", 0, 0, 1, 2, 0, "" }, /* para */ +{ "glossterm", 0, 0, 0, 2, 0, "" }, /* para */ +{ "graphic", 0, 0, 0, 9, 0, "" }, +{ "graphicco", 0, 0, 0, 9, 0, "" }, /* areaspec */ +{ "group", 0, 0, 0, 9, 0, "" }, /* arg */ +{ "guibutton", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "guiicon", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "guilabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "guimenuitem",0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "guimenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "guisubmenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "hardware", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "highlights", 0, 0, 0, 9, 0, "" }, /* highlights.mix */ +{ "holder", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "honorific", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "imagedata", 0, 2, 1, 0, 0, "" }, +{ "imageobjectco",0, 0, 0, 9, 0, "" }, /* areaspec */ +{ "imageobject",0, 0, 0, 9, 0, "" }, /* objectinfo */ +{ "important", 0, 0, 0, 9, 0, "" }, /* title */ +{ "indexdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ +{ "indexentry", 0, 0, 0, 9, 0, "" }, /* primaryie */ +{ "index", 0, 0, 0, 9, 0, "" }, /* indexinfo */ +{ "indexinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "indexterm", 0, 0, 0, 9, 0, "" }, /* primary */ +{ "informalequation",0, 0, 0, 9, 0, "" }, /* equation.content */ +{ "informalexample",0, 0, 0, 9, 0, "" }, /* example.mix */ +{ "informalfigure",0, 0, 0, 9, 0, "" }, /* figure.mix */ +{ "informaltable",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "initializer",0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "inlineequation",0, 0, 0, 9, 0, "" }, /* inlineequation.content */ +{ "inlinegraphic",0, 0, 0, 9, 0, "" }, +{ "inlinemediaobject",0,0, 0, 9, 0, "" }, /* objectinfo */ +{ "interfacename",0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "interface", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "invpartnumber",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "isbn", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "issn", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "issuenum", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "itemizedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "itermset", 0, 0, 0, 9, 0, "" }, /* indexterm */ +{ "jobtitle", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "keycap", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "keycode", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "keycombo", 0, 0, 0, 9, 0, "" }, /* keycap */ +{ "keysym", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "keyword", 0, 0, 0, 1, 0, "" }, +{ "keywordset", 0, 0, 0, 9, 0, "" }, /* keyword */ +{ "label", 0, 0, 0, 3, 0, "" }, /* word */ +{ "legalnotice",0, 0, 0, 9, 0, "" }, /* title */ +{ "lineage", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "lineannotation",0, 0, 0, 2, 0, "" }, /* para */ +{ "link", 0, 0, 0, 2, 0, "" }, /* para */ +{ "listitem", 0, 0, 0, 9, 0, "" }, /* component.mix */ +{ "literal", 0, 0, 0, 9, 0, "" }, /* cptr */ +{ "literallayout",0, 0, 0, 2, 0, "" }, /* para */ +{ "lot", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */ +{ "lotentry", 0, 0, 0, 2, 0, "" }, /* para */ +{ "manvolnum", 0, 0, 0, 3, 0, "" }, /* word */ +{ "markup", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "medialabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "mediaobjectco",0, 0, 0, 9, 0, "" }, /* objectinfo */ +{ "mediaobject",0, 0, 0, 9, 0, "" }, /* objectinfo */ +{ "member", 0, 0, 0, 2, 0, "" }, /* para */ +{ "menuchoice", 0, 0, 0, 9, 0, "" }, /* shortcut */ +{ "methodname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "methodparam",0, 0, 0, 9, 0, "" }, /* modifier */ +{ "methodsynopsis",0, 0, 0, 9, 0, "" }, /* modifier */ +{ "modespec", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "modifier", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "mousebutton",0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "msgaud", 0, 0, 0, 2, 0, "" }, /* para */ +{ "msgentry", 0, 0, 0, 9, 0, "" }, /* msg */ +{ "msgexplan", 0, 0, 0, 9, 0, "" }, /* title */ +{ "msginfo", 0, 0, 0, 9, 0, "" }, /* msglevel */ +{ "msglevel", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "msgmain", 0, 0, 0, 9, 0, "" }, /* title */ +{ "msgorig", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "msgrel", 0, 0, 0, 9, 0, "" }, /* title */ +{ "msgset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "msgsub", 0, 0, 0, 9, 0, "" }, /* title */ +{ "msgtext", 0, 0, 0, 9, 0, "" }, /* component.mix */ +{ "msg", 0, 0, 0, 9, 0, "" }, /* title */ +{ "note", 0, 0, 0, 9, 0, "" }, /* title */ +{ "objectinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "olink", 0, 0, 0, 2, 0, "" }, /* para */ +{ "ooclass", 0, 0, 0, 9, 0, "" }, /* modifier */ +{ "ooexception",0, 0, 0, 9, 0, "" }, /* modifier */ +{ "oointerface",0, 0, 0, 9, 0, "" }, /* modifier */ +{ "optional", 0, 0, 0, 9, 0, "" }, /* cptr */ +{ "option", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "orderedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "orgdiv", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "orgname", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "otheraddr", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "othercredit",0, 0, 0, 9, 0, "" }, /* person.ident.mix */ +{ "othername", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "pagenums", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "paramdef", 0, 0, 0, 1, 0, "" }, +{ "parameter", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "para", 0, 0, 0, 2, 0, "" }, /* para */ +{ "partinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "partintro", 0, 0, 0, 9, 0, "" }, /* div.title.content */ +{ "part", 0, 0, 0, 9, 0, "" }, /* partinfo */ +{ "phone", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "phrase", 0, 0, 0, 2, 0, "" }, /* para */ +{ "pob", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "postcode", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "prefaceinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "preface", 0, 0, 0, 9, 0, "" }, /* prefaceinfo */ +{ "primaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "primary", 0, 0, 0, 9, 0, "" }, /* ndxterm */ +{ "printhistory",0, 0, 0, 9, 0, "" }, /* para.class */ +{ "procedure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "productname",0, 0, 0, 2, 0, "" }, /* para */ +{ "productnumber",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "programlistingco",0, 0, 0, 9, 0, "" }, /* areaspec */ +{ "programlisting",0, 0, 0, 2, 0, "" }, /* para */ +{ "prompt", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "property", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "pubdate", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "publishername",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "publisher", 0, 0, 0, 9, 0, "" }, /* publishername */ +{ "pubsnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "qandadiv", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "qandaentry", 0, 0, 0, 9, 0, "" }, /* revhistory */ +{ "qandaset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "question", 0, 0, 0, 9, 0, "" }, /* label */ +{ "quote", 0, 0, 0, 2, 0, "" }, /* para */ +{ "refclass", 0, 0, 0, 9, 0, "" }, /* refclass.char.mix */ +{ "refdescriptor",0, 0, 0, 9, 0, "" }, /* refname.char.mix */ +{ "refentryinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "refentry", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */ +{ "refentrytitle",0, 0, 0, 2, 0, "" }, /* para */ +{ "referenceinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "reference", 0, 0, 0, 9, 0, "" }, /* referenceinfo */ +{ "refmeta", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */ +{ "refmiscinfo",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "refnamediv", 0, 0, 0, 9, 0, "" }, /* refdescriptor */ +{ "refname", 0, 0, 0, 9, 0, "" }, /* refname.char.mix */ +{ "refpurpose", 0, 0, 0, 9, 0, "" }, /* refinline.char.mix */ +{ "refsect1info",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "refsect1", 0, 0, 0, 9, 0, "" }, /* refsect */ +{ "refsect2info",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "refsect2", 0, 0, 0, 9, 0, "" }, /* refsect */ +{ "refsect3info",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "refsect3", 0, 0, 0, 9, 0, "" }, /* refsect */ +{ "refsynopsisdivinfo",0,0, 0, 9, 0, "" }, /* graphic */ +{ "refsynopsisdiv",0, 0, 0, 9, 0, "" }, /* refsynopsisdivinfo */ +{ "releaseinfo",0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "remark", 0, 0, 0, 2, 0, "" }, /* para */ +{ "replaceable",0, 0, 0, 1, 0, "" }, +{ "returnvalue",0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "revdescription",0, 0, 0, 9, 0, "" }, /* revdescription.mix */ +{ "revhistory", 0, 0, 0, 9, 0, "" }, /* revision */ +{ "revision", 0, 0, 0, 9, 0, "" }, /* revnumber */ +{ "revnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "revremark", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */ +{ "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */ +{ "sbr", 0, 2, 1, 0, 0, "" }, +{ "screenco", 0, 0, 0, 9, 0, "" }, /* areaspec */ +{ "screeninfo", 0, 0, 0, 2, 0, "" }, /* para */ +{ "screen", 0, 0, 0, 2, 0, "" }, /* para */ +{ "screenshot", 0, 0, 0, 9, 0, "" }, /* screeninfo */ +{ "secondaryie",0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "secondary", 0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "sect1info", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "sect1", 0, 0, 0, 9, 0, "" }, /* sect */ +{ "sect2info", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "sect2", 0, 0, 0, 9, 0, "" }, /* sect */ +{ "sect3info", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "sect3", 0, 0, 0, 9, 0, "" }, /* sect */ +{ "sect4info", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "sect4", 0, 0, 0, 9, 0, "" }, /* sect */ +{ "sect5info", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "sect5", 0, 0, 0, 9, 0, "" }, /* sect */ +{ "sectioninfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "section", 0, 0, 0, 9, 0, "" }, /* sectioninfo */ +{ "seealsoie", 0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "seealso", 0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "seeie", 0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "see", 0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "seglistitem",0, 0, 0, 9, 0, "" }, /* seg */ +{ "segmentedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "seg", 0, 0, 0, 2, 0, "" }, /* para */ +{ "segtitle", 0, 0, 0, 8, 0, "" }, /* title */ +{ "seriesvolnums", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "set", 0, 0, 0, 9, 0, "" }, /* div.title.content */ +{ "setindexinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "setindex", 0, 0, 0, 9, 0, "" }, /* setindexinfo */ +{ "setinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ +{ "sgmltag", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "shortaffil", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "shortcut", 0, 0, 0, 9, 0, "" }, /* keycap */ +{ "sidebarinfo",0, 0, 0, 9, 0, "" }, /* graphic */ +{ "sidebar", 0, 0, 0, 9, 0, "" }, /* sidebarinfo */ +{ "simpara", 0, 0, 0, 2, 0, "" }, /* para */ +{ "simplelist", 0, 0, 0, 9, 0, "" }, /* member */ +{ "simplemsgentry", 0, 0, 0, 9, 0, "" }, /* msgtext */ +{ "simplesect", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ +{ "spanspec", 0, 2, 1, 0, 0, "" }, +{ "state", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "step", 0, 0, 0, 9, 0, "" }, /* title */ +{ "street", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "structfield",0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "structname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "subjectset", 0, 0, 0, 9, 0, "" }, /* subject */ +{ "subject", 0, 0, 0, 9, 0, "" }, /* subjectterm */ +{ "subjectterm",0, 0, 0, 1, 0, "" }, +{ "subscript", 0, 0, 0, 1, 0, "" }, +{ "substeps", 0, 0, 0, 9, 0, "" }, /* step */ +{ "subtitle", 0, 0, 0, 8, 0, "" }, /* title */ +{ "superscript", 0, 0, 0, 1, 0, "" }, +{ "surname", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "symbol", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "synopfragment", 0, 0, 0, 9, 0, "" }, /* arg */ +{ "synopfragmentref", 0, 0, 0, 1, 0, "" }, +{ "synopsis", 0, 0, 0, 2, 0, "" }, /* para */ +{ "systemitem", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "table", 0, 0, 0, 9, 0, "" }, /* tbl.table.mdl */ +/* { "%tbl.table.name;", 0, 0, 0, 9, 0, "" },*/ /* tbl.table.mdl */ +{ "tbody", 0, 0, 0, 9, 0, "" }, /* row */ +{ "tbody", 0, 0, 0, 9, 0, "" }, /* row */ +{ "term", 0, 0, 0, 2, 0, "" }, /* para */ +{ "tertiaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "tertiary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */ +{ "textobject", 0, 0, 0, 9, 0, "" }, /* objectinfo */ +{ "tfoot", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */ +{ "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */ +{ "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */ +{ "thead", 0, 0, 0, 9, 0, "" }, /* row */ +{ "thead", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */ +{ "tip", 0, 0, 0, 9, 0, "" }, /* title */ +{ "titleabbrev",0, 0, 0, 8, 0, "" }, /* title */ +{ "title", 0, 0, 0, 8, 0, "" }, /* title */ +{ "tocback", 0, 0, 0, 2, 0, "" }, /* para */ +{ "toc", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */ +{ "tocchap", 0, 0, 0, 9, 0, "" }, /* tocentry */ +{ "tocentry", 0, 0, 0, 2, 0, "" }, /* para */ +{ "tocfront", 0, 0, 0, 2, 0, "" }, /* para */ +{ "toclevel1", 0, 0, 0, 9, 0, "" }, /* tocentry */ +{ "toclevel2", 0, 0, 0, 9, 0, "" }, /* tocentry */ +{ "toclevel3", 0, 0, 0, 9, 0, "" }, /* tocentry */ +{ "toclevel4", 0, 0, 0, 9, 0, "" }, /* tocentry */ +{ "toclevel5", 0, 0, 0, 9, 0, "" }, /* tocentry */ +{ "tocpart", 0, 0, 0, 9, 0, "" }, /* tocentry */ +{ "token", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "trademark", 0, 0, 0, 1, 0, "" }, +{ "type", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "ulink", 0, 0, 0, 2, 0, "" }, /* para */ +{ "userinput", 0, 0, 0, 9, 0, "" }, /* cptr */ +{ "varargs", 0, 2, 1, 0, 0, "" }, +{ "variablelist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ +{ "varlistentry",0, 0, 0, 9, 0, "" }, /* term */ +{ "varname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ +{ "videodata", 0, 2, 1, 0, 0, "" }, +{ "videoobject",0, 0, 0, 9, 0, "" }, /* objectinfo */ +{ "void", 0, 2, 1, 0, 0, "" }, +{ "volumenum", 0, 0, 0, 4, 0, "" }, /* docinfo */ +{ "warning", 0, 0, 0, 9, 0, "" }, /* title */ +{ "wordasword", 0, 0, 0, 3, 0, "" }, /* word */ +{ "xref", 0, 2, 1, 0, 0, "" }, +{ "year", 0, 0, 0, 4, 0, "" }, /* docinfo */ +}; + +#if 0 +/* + * start tags that imply the end of a current element + * any tag of each line implies the end of the current element if the type of + * that element is in the same line + */ +static const char *docbEquEnd[] = { +"dt", "dd", "li", "option", NULL, +"h1", "h2", "h3", "h4", "h5", "h6", NULL, +"ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL, +NULL +}; +#endif + +/* + * according the SGML DTD, HR should be added to the 2nd line above, as it + * is not allowed within a H1, H2, H3, etc. But we should tolerate that case + * because many documents contain rules in headings... + */ + +/* + * start tags that imply the end of current element + */ +static const char *docbStartClose[] = { +NULL +}; + +static const char** docbStartCloseIndex[100]; +static int docbStartCloseIndexinitialized = 0; + +/************************************************************************ + * * + * functions to handle SGML specific data * + * * + ************************************************************************/ + +/** + * docbInitAutoClose: + * + * Initialize the docbStartCloseIndex for fast lookup of closing tags names. + * + */ +static void +docbInitAutoClose(void) { + int indx, i = 0; + + if (docbStartCloseIndexinitialized) return; + + for (indx = 0;indx < 100;indx ++) docbStartCloseIndex[indx] = NULL; + indx = 0; + while ((docbStartClose[i] != NULL) && (indx < 100 - 1)) { + docbStartCloseIndex[indx++] = &docbStartClose[i]; + while (docbStartClose[i] != NULL) i++; + i++; + } +} + +/** + * docbTagLookup: + * @tag: The tag name + * + * Lookup the SGML tag in the ElementTable + * + * Returns the related docbElemDescPtr or NULL if not found. + */ +static docbElemDescPtr +docbTagLookup(const xmlChar *tag) { + unsigned int i; + + for (i = 0; i < (sizeof(docbookElementTable) / + sizeof(docbookElementTable[0]));i++) { + if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name)) + return(&docbookElementTable[i]); + } + return(NULL); +} + +/** + * docbCheckAutoClose: + * @newtag: The new tag name + * @oldtag: The old tag name + * + * Checks whether the new tag is one of the registered valid tags for + * closing old. + * Initialize the docbStartCloseIndex for fast lookup of closing tags names. + * + * Returns 0 if no, 1 if yes. + */ +static int +docbCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) { + int i, indx; + const char **closed = NULL; + + if (docbStartCloseIndexinitialized == 0) docbInitAutoClose(); + + /* inefficient, but not a big deal */ + for (indx = 0; indx < 100;indx++) { + closed = docbStartCloseIndex[indx]; + if (closed == NULL) return(0); + if (xmlStrEqual(BAD_CAST *closed, newtag)) break; + } + + i = closed - docbStartClose; + i++; + while (docbStartClose[i] != NULL) { + if (xmlStrEqual(BAD_CAST docbStartClose[i], oldtag)) { + return(1); + } + i++; + } + return(0); +} + +/** + * docbAutoCloseOnClose: + * @ctxt: an SGML parser context + * @newtag: The new tag name + * + * The DocBook DTD allows an ending tag to implicitly close other tags. + */ +static void +docbAutoCloseOnClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) { + docbElemDescPtr info; + xmlChar *oldname; + int i; + + if ((newtag[0] == '/') && (newtag[1] == 0)) + return; + +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr); + for (i = 0;i < ctxt->nameNr;i++) + xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]); +#endif + + for (i = (ctxt->nameNr - 1);i >= 0;i--) { + if (xmlStrEqual(newtag, ctxt->nameTab[i])) break; + } + if (i < 0) return; + + while (!xmlStrEqual(newtag, ctxt->name)) { + info = docbTagLookup(ctxt->name); + if ((info == NULL) || (info->endTag == 1)) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name); +#endif + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Opening and ending tag mismatch: %s and %s\n", + newtag, ctxt->name); + ctxt->wellFormed = 0; + } + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = docbnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } +} + +/** + * docbAutoClose: + * @ctxt: an SGML parser context + * @newtag: The new tag name or NULL + * + * The DocBook DTD allows a tag to implicitly close other tags. + * The list is kept in docbStartClose array. This function is + * called when a new tag has been detected and generates the + * appropriates closes if possible/needed. + * If newtag is NULL this mean we are at the end of the resource + * and we should check + */ +static void +docbAutoClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) { + xmlChar *oldname; + while ((newtag != NULL) && (ctxt->name != NULL) && + (docbCheckAutoClose(newtag, ctxt->name))) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"docbAutoClose: %s closes %s\n", newtag, ctxt->name); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = docbnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"docbAutoClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } +} + +/** + * docbAutoCloseTag: + * @doc: the SGML document + * @name: The tag name + * @elem: the SGML element + * + * The DocBook DTD allows a tag to implicitly close other tags. + * The list is kept in docbStartClose array. This function checks + * if the element or one of it's children would autoclose the + * given tag. + * + * Returns 1 if autoclose, 0 otherwise + */ +static int +docbAutoCloseTag(docbDocPtr doc, const xmlChar *name, docbNodePtr elem) { + docbNodePtr child; + + if (elem == NULL) return(1); + if (xmlStrEqual(name, elem->name)) return(0); + if (docbCheckAutoClose(elem->name, name)) return(1); + child = elem->children; + while (child != NULL) { + if (docbAutoCloseTag(doc, name, child)) return(1); + child = child->next; + } + return(0); +} + +/************************************************************************ + * * + * The list of SGML predefined entities * + * * + ************************************************************************/ + + +static docbEntityDesc +docbookEntitiesTable[] = { +/* + * the 4 absolute ones, plus apostrophe. + */ +{ 0x0026, "amp", "AMPERSAND" }, +{ 0x003C, "lt", "LESS-THAN SIGN" }, + +/* + * Converted with VI macros from docbook ent files + */ +{ 0x0021, "excl", "EXCLAMATION MARK" }, +{ 0x0022, "quot", "QUOTATION MARK" }, +{ 0x0023, "num", "NUMBER SIGN" }, +{ 0x0024, "dollar", "DOLLAR SIGN" }, +{ 0x0025, "percnt", "PERCENT SIGN" }, +{ 0x0027, "apos", "APOSTROPHE" }, +{ 0x0028, "lpar", "LEFT PARENTHESIS" }, +{ 0x0029, "rpar", "RIGHT PARENTHESIS" }, +{ 0x002A, "ast", "ASTERISK OPERATOR" }, +{ 0x002B, "plus", "PLUS SIGN" }, +{ 0x002C, "comma", "COMMA" }, +{ 0x002D, "hyphen", "HYPHEN-MINUS" }, +{ 0x002E, "period", "FULL STOP" }, +{ 0x002F, "sol", "SOLIDUS" }, +{ 0x003A, "colon", "COLON" }, +{ 0x003B, "semi", "SEMICOLON" }, +{ 0x003D, "equals", "EQUALS SIGN" }, +{ 0x003E, "gt", "GREATER-THAN SIGN" }, +{ 0x003F, "quest", "QUESTION MARK" }, +{ 0x0040, "commat", "COMMERCIAL AT" }, +{ 0x005B, "lsqb", "LEFT SQUARE BRACKET" }, +{ 0x005C, "bsol", "REVERSE SOLIDUS" }, +{ 0x005D, "rsqb", "RIGHT SQUARE BRACKET" }, +{ 0x005E, "circ", "RING OPERATOR" }, +{ 0x005F, "lowbar", "LOW LINE" }, +{ 0x0060, "grave", "GRAVE ACCENT" }, +{ 0x007B, "lcub", "LEFT CURLY BRACKET" }, +{ 0x007C, "verbar", "VERTICAL LINE" }, +{ 0x007D, "rcub", "RIGHT CURLY BRACKET" }, +{ 0x00A0, "nbsp", "NO-BREAK SPACE" }, +{ 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" }, +{ 0x00A2, "cent", "CENT SIGN" }, +{ 0x00A3, "pound", "POUND SIGN" }, +{ 0x00A4, "curren", "CURRENCY SIGN" }, +{ 0x00A5, "yen", "YEN SIGN" }, +{ 0x00A6, "brvbar", "BROKEN BAR" }, +{ 0x00A7, "sect", "SECTION SIGN" }, +{ 0x00A8, "die", "" }, +{ 0x00A8, "Dot", "" }, +{ 0x00A8, "uml", "" }, +{ 0x00A9, "copy", "COPYRIGHT SIGN" }, +{ 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" }, +{ 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" }, +{ 0x00AC, "not", "NOT SIGN" }, +{ 0x00AD, "shy", "SOFT HYPHEN" }, +{ 0x00AE, "reg", "REG TRADE MARK SIGN" }, +{ 0x00AF, "macr", "MACRON" }, +{ 0x00B0, "deg", "DEGREE SIGN" }, +{ 0x00B1, "plusmn", "PLUS-MINUS SIGN" }, +{ 0x00B2, "sup2", "SUPERSCRIPT TWO" }, +{ 0x00B3, "sup3", "SUPERSCRIPT THREE" }, +{ 0x00B4, "acute", "ACUTE ACCENT" }, +{ 0x00B5, "micro", "MICRO SIGN" }, +{ 0x00B6, "para", "PILCROW SIGN" }, +{ 0x00B7, "middot", "MIDDLE DOT" }, +{ 0x00B8, "cedil", "CEDILLA" }, +{ 0x00B9, "sup1", "SUPERSCRIPT ONE" }, +{ 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" }, +{ 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" }, +{ 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" }, +{ 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" }, +{ 0x00BD, "half", "VULGAR FRACTION ONE HALF" }, +{ 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" }, +{ 0x00BF, "iquest", "INVERTED QUESTION MARK" }, +{ 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" }, +{ 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" }, +{ 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" }, +{ 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" }, +{ 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" }, +{ 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" }, +{ 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" }, +{ 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" }, +{ 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" }, +{ 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" }, +{ 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" }, +{ 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" }, +{ 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" }, +{ 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" }, +{ 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" }, +{ 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" }, +{ 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" }, +{ 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" }, +{ 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" }, +{ 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" }, +{ 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" }, +{ 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" }, +{ 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" }, +{ 0x00D7, "times", "MULTIPLICATION SIGN" }, +{ 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" }, +{ 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" }, +{ 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" }, +{ 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" }, +{ 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" }, +{ 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" }, +{ 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" }, +{ 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" }, +{ 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" }, +{ 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" }, +{ 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" }, +{ 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" }, +{ 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" }, +{ 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" }, +{ 0x00E6, "aelig", "LATIN SMALL LETTER AE" }, +{ 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" }, +{ 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" }, +{ 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" }, +{ 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" }, +{ 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" }, +{ 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" }, +{ 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" }, +{ 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" }, +{ 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" }, +{ 0x00F0, "eth", "LATIN SMALL LETTER ETH" }, +{ 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" }, +{ 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" }, +{ 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" }, +{ 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" }, +{ 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" }, +{ 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" }, +{ 0x00F7, "divide", "DIVISION SIGN" }, +{ 0x00F8, "oslash", "CIRCLED DIVISION SLASH" }, +{ 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" }, +{ 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" }, +{ 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" }, +{ 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" }, +{ 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" }, +{ 0x00FE, "thorn", "LATIN SMALL LETTER THORN" }, +{ 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" }, +{ 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" }, +{ 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" }, +{ 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" }, +{ 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" }, +{ 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" }, +{ 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" }, +{ 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" }, +{ 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" }, +{ 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" }, +{ 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" }, +{ 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" }, +{ 0x010B, "cdot", "DOT OPERATOR" }, +{ 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" }, +{ 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" }, +{ 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" }, +{ 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" }, +{ 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" }, +{ 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" }, +{ 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" }, +{ 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" }, +{ 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" }, +{ 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" }, +{ 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" }, +{ 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" }, +{ 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" }, +{ 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" }, +{ 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" }, +{ 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" }, +{ 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" }, +{ 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" }, +{ 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" }, +{ 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" }, +{ 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" }, +{ 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" }, +{ 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" }, +{ 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" }, +{ 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" }, +{ 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" }, +{ 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" }, +{ 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" }, +{ 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" }, +{ 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" }, +{ 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" }, +{ 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" }, +{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" }, +{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" }, +{ 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" }, +{ 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" }, +{ 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" }, +{ 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" }, +{ 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" }, +{ 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" }, +{ 0x0138, "kgreen", "LATIN SMALL LETTER KRA" }, +{ 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" }, +{ 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" }, +{ 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" }, +{ 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" }, +{ 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" }, +{ 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" }, +{ 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" }, +{ 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" }, +{ 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" }, +{ 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" }, +{ 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" }, +{ 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" }, +{ 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" }, +{ 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" }, +{ 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" }, +{ 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" }, +{ 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" }, +{ 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" }, +{ 0x014B, "eng", "LATIN SMALL LETTER ENG" }, +{ 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" }, +{ 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" }, +{ 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" }, +{ 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" }, +{ 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" }, +{ 0x0153, "oelig", "LATIN SMALL LIGATURE OE" }, +{ 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" }, +{ 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" }, +{ 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" }, +{ 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" }, +{ 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" }, +{ 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" }, +{ 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" }, +{ 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" }, +{ 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" }, +{ 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" }, +{ 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" }, +{ 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" }, +{ 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" }, +{ 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" }, +{ 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" }, +{ 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" }, +{ 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" }, +{ 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" }, +{ 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" }, +{ 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" }, +{ 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" }, +{ 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" }, +{ 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" }, +{ 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" }, +{ 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" }, +{ 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" }, +{ 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" }, +{ 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" }, +{ 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" }, +{ 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" }, +{ 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" }, +{ 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" }, +{ 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" }, +{ 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" }, +{ 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" }, +{ 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" }, +{ 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" }, +{ 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" }, +{ 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" }, +{ 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" }, +{ 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" }, +{ 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" }, +{ 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" }, +{ 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" }, +{ 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" }, +{ 0x02C7, "caron", "CARON" }, +{ 0x02D8, "breve", "BREVE" }, +{ 0x02D9, "dot", "DOT ABOVE" }, +{ 0x02DA, "ring", "RING ABOVE" }, +{ 0x02DB, "ogon", "OGONEK" }, +{ 0x02DC, "tilde", "TILDE" }, +{ 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" }, +{ 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" }, +{ 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" }, +{ 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" }, +{ 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" }, +{ 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" }, +{ 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" }, +{ 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" }, +{ 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" }, +{ 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" }, +{ 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" }, +{ 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" }, +{ 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" }, +{ 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" }, +{ 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" }, +{ 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" }, +{ 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" }, +{ 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" }, +{ 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" }, +{ 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" }, +{ 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" }, +{ 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" }, +{ 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" }, +{ 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" }, +{ 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" }, +{ 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" }, +{ 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" }, +{ 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" }, +{ 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" }, +{ 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" }, +{ 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" }, +{ 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" }, +{ 0x039E, "Xi", "GREEK CAPITAL LETTER XI" }, +{ 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" }, +{ 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" }, +{ 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" }, +{ 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" }, +{ 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" }, +{ 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" }, +{ 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" }, +{ 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" }, +{ 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" }, +{ 0x03A5, "Ugr", "" }, +{ 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" }, +{ 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" }, +{ 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" }, +{ 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" }, +{ 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" }, +{ 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" }, +{ 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" }, +{ 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" }, +{ 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" }, +{ 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" }, +{ 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" }, +{ 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" }, +{ 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" }, +{ 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" }, +{ 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" }, +{ 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" }, +{ 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" }, +{ 0x03B1, "agr", "" }, +{ 0x03B1, "alpha", "" }, +{ 0x03B1, "b.alpha", "" }, +{ 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" }, +{ 0x03B2, "beta", "GREEK SMALL LETTER BETA" }, +{ 0x03B2, "bgr", "GREEK SMALL LETTER BETA" }, +{ 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" }, +{ 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" }, +{ 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" }, +{ 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" }, +{ 0x03B4, "delta", "GREEK SMALL LETTER DELTA" }, +{ 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" }, +{ 0x03B5, "b.epsi", "" }, +{ 0x03B5, "b.epsis", "" }, +{ 0x03B5, "b.epsiv", "" }, +{ 0x03B5, "egr", "" }, +{ 0x03B5, "epsiv", "" }, +{ 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" }, +{ 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" }, +{ 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" }, +{ 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" }, +{ 0x03B7, "eegr", "GREEK SMALL LETTER ETA" }, +{ 0x03B7, "eta", "GREEK SMALL LETTER ETA" }, +{ 0x03B8, "b.thetas", "" }, +{ 0x03B8, "thetas", "" }, +{ 0x03B8, "thgr", "" }, +{ 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" }, +{ 0x03B9, "igr", "GREEK SMALL LETTER IOTA" }, +{ 0x03B9, "iota", "GREEK SMALL LETTER IOTA" }, +{ 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" }, +{ 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" }, +{ 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" }, +{ 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" }, +{ 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" }, +{ 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" }, +{ 0x03BC, "b.mu", "GREEK SMALL LETTER MU" }, +{ 0x03BC, "mgr", "GREEK SMALL LETTER MU" }, +{ 0x03BC, "mu", "GREEK SMALL LETTER MU" }, +{ 0x03BD, "b.nu", "GREEK SMALL LETTER NU" }, +{ 0x03BD, "ngr", "GREEK SMALL LETTER NU" }, +{ 0x03BD, "nu", "GREEK SMALL LETTER NU" }, +{ 0x03BE, "b.xi", "GREEK SMALL LETTER XI" }, +{ 0x03BE, "xgr", "GREEK SMALL LETTER XI" }, +{ 0x03BE, "xi", "GREEK SMALL LETTER XI" }, +{ 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" }, +{ 0x03C0, "b.pi", "GREEK SMALL LETTER PI" }, +{ 0x03C0, "pgr", "GREEK SMALL LETTER PI" }, +{ 0x03C0, "pi", "GREEK SMALL LETTER PI" }, +{ 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" }, +{ 0x03C1, "rgr", "GREEK SMALL LETTER RHO" }, +{ 0x03C1, "rho", "GREEK SMALL LETTER RHO" }, +{ 0x03C2, "b.sigmav", "" }, +{ 0x03C2, "sfgr", "" }, +{ 0x03C2, "sigmav", "" }, +{ 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" }, +{ 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" }, +{ 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" }, +{ 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" }, +{ 0x03C4, "tau", "GREEK SMALL LETTER TAU" }, +{ 0x03C4, "tgr", "GREEK SMALL LETTER TAU" }, +{ 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" }, +{ 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" }, +{ 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" }, +{ 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" }, +{ 0x03C6, "phgr", "GREEK SMALL LETTER PHI" }, +{ 0x03C6, "phis", "GREEK SMALL LETTER PHI" }, +{ 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" }, +{ 0x03C7, "chi", "GREEK SMALL LETTER CHI" }, +{ 0x03C7, "khgr", "GREEK SMALL LETTER CHI" }, +{ 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" }, +{ 0x03C8, "psgr", "GREEK SMALL LETTER PSI" }, +{ 0x03C8, "psi", "GREEK SMALL LETTER PSI" }, +{ 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" }, +{ 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" }, +{ 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" }, +{ 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" }, +{ 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" }, +{ 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" }, +{ 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" }, +{ 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" }, +{ 0x03D1, "b.thetav", "" }, +{ 0x03D1, "thetav", "" }, +{ 0x03D2, "b.Upsi", "" }, +{ 0x03D2, "Upsi", "" }, +{ 0x03D5, "b.phiv", "GREEK PHI SYMBOL" }, +{ 0x03D5, "phiv", "GREEK PHI SYMBOL" }, +{ 0x03D6, "b.piv", "GREEK PI SYMBOL" }, +{ 0x03D6, "piv", "GREEK PI SYMBOL" }, +{ 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" }, +{ 0x03DC, "gammad", "GREEK LETTER DIGAMMA" }, +{ 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" }, +{ 0x03F0, "kappav", "GREEK KAPPA SYMBOL" }, +{ 0x03F1, "b.rhov", "GREEK RHO SYMBOL" }, +{ 0x03F1, "rhov", "GREEK RHO SYMBOL" }, +{ 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" }, +{ 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" }, +{ 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" }, +{ 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" }, +{ 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" }, +{ 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" }, +{ 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" }, +{ 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" }, +{ 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" }, +{ 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" }, +{ 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" }, +{ 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" }, +{ 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" }, +{ 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" }, +{ 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" }, +{ 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" }, +{ 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" }, +{ 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" }, +{ 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" }, +{ 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" }, +{ 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" }, +{ 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" }, +{ 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" }, +{ 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" }, +{ 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" }, +{ 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" }, +{ 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" }, +{ 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" }, +{ 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" }, +{ 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" }, +{ 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" }, +{ 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" }, +{ 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" }, +{ 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" }, +{ 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" }, +{ 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" }, +{ 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" }, +{ 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" }, +{ 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" }, +{ 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" }, +{ 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" }, +{ 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" }, +{ 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" }, +{ 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" }, +{ 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" }, +{ 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" }, +{ 0x0430, "acy", "CYRILLIC SMALL LETTER A" }, +{ 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" }, +{ 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" }, +{ 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" }, +{ 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" }, +{ 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" }, +{ 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" }, +{ 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" }, +{ 0x0438, "icy", "CYRILLIC SMALL LETTER I" }, +{ 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" }, +{ 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" }, +{ 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" }, +{ 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" }, +{ 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" }, +{ 0x043E, "ocy", "CYRILLIC SMALL LETTER O" }, +{ 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" }, +{ 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" }, +{ 0x0441, "scy", "CYRILLIC SMALL LETTER ES" }, +{ 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" }, +{ 0x0443, "ucy", "CYRILLIC SMALL LETTER U" }, +{ 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" }, +{ 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" }, +{ 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" }, +{ 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" }, +{ 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" }, +{ 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" }, +{ 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" }, +{ 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" }, +{ 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" }, +{ 0x044D, "ecy", "CYRILLIC SMALL LETTER E" }, +{ 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" }, +{ 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" }, +{ 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" }, +{ 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" }, +{ 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" }, +{ 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" }, +{ 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" }, +{ 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" }, +{ 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" }, +{ 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" }, +{ 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" }, +{ 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" }, +{ 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" }, +{ 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" }, +{ 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" }, +{ 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" }, +{ 0x2002, "ensp", "EN SPACE" }, +{ 0x2003, "emsp", "EM SPACE" }, +{ 0x2004, "emsp13", "THREE-PER-EM SPACE" }, +{ 0x2005, "emsp14", "FOUR-PER-EM SPACE" }, +{ 0x2007, "numsp", "FIGURE SPACE" }, +{ 0x2008, "puncsp", "PUNCTUATION SPACE" }, +{ 0x2009, "thinsp", "THIN SPACE" }, +{ 0x200A, "hairsp", "HAIR SPACE" }, +{ 0x2010, "dash", "HYPHEN" }, +{ 0x2013, "ndash", "EN DASH" }, +{ 0x2014, "mdash", "EM DASH" }, +{ 0x2015, "horbar", "HORIZONTAL BAR" }, +{ 0x2016, "Verbar", "DOUBLE VERTICAL LINE" }, +{ 0x2018, "lsquo", "" }, +{ 0x2018, "rsquor", "" }, +{ 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" }, +{ 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" }, +{ 0x201C, "ldquo", "" }, +{ 0x201C, "rdquor", "" }, +{ 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" }, +{ 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" }, +{ 0x2020, "dagger", "DAGGER" }, +{ 0x2021, "Dagger", "DOUBLE DAGGER" }, +{ 0x2022, "bull", "BULLET" }, +{ 0x2025, "nldr", "TWO DOT LEADER" }, +{ 0x2026, "hellip", "HORIZONTAL ELLIPSIS" }, +{ 0x2026, "mldr", "HORIZONTAL ELLIPSIS" }, +{ 0x2030, "permil", "PER MILLE SIGN" }, +{ 0x2032, "prime", "PRIME" }, +{ 0x2032, "vprime", "PRIME" }, +{ 0x2033, "Prime", "DOUBLE PRIME" }, +{ 0x2034, "tprime", "TRIPLE PRIME" }, +{ 0x2035, "bprime", "REVERSED PRIME" }, +{ 0x2041, "caret", "CARET" }, +{ 0x2043, "hybull", "HYPHEN BULLET" }, +{ 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" }, +{ 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" }, +{ 0x2105, "incare", "CARE OF" }, +{ 0x210B, "hamilt", "SCRIPT CAPITAL H" }, +{ 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" }, +{ 0x2111, "image", "BLACK-LETTER CAPITAL I" }, +{ 0x2112, "lagran", "SCRIPT CAPITAL L" }, +{ 0x2113, "ell", "SCRIPT SMALL L" }, +{ 0x2116, "numero", "NUMERO SIGN" }, +{ 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" }, +{ 0x2118, "weierp", "SCRIPT CAPITAL P" }, +{ 0x211C, "real", "BLACK-LETTER CAPITAL R" }, +{ 0x211E, "rx", "PRESCRIPTION TAKE" }, +{ 0x2122, "trade", "TRADE MARK SIGN" }, +{ 0x2126, "ohm", "OHM SIGN" }, +{ 0x212B, "angst", "ANGSTROM SIGN" }, +{ 0x212C, "bernou", "SCRIPT CAPITAL B" }, +{ 0x2133, "phmmat", "SCRIPT CAPITAL M" }, +{ 0x2134, "order", "SCRIPT SMALL O" }, +{ 0x2135, "aleph", "ALEF SYMBOL" }, +{ 0x2136, "beth", "BET SYMBOL" }, +{ 0x2137, "gimel", "GIMEL SYMBOL" }, +{ 0x2138, "daleth", "DALET SYMBOL" }, +{ 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" }, +{ 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" }, +{ 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" }, +{ 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" }, +{ 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" }, +{ 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" }, +{ 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" }, +{ 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" }, +{ 0x215B, "frac18", "" }, +{ 0x215C, "frac38", "" }, +{ 0x215D, "frac58", "" }, +{ 0x215E, "frac78", "" }, +{ 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" }, +{ 0x2191, "uarr", "UPWARDS ARROW" }, +{ 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" }, +{ 0x2193, "darr", "DOWNWARDS ARROW" }, +{ 0x2194, "harr", "LEFT RIGHT ARROW" }, +{ 0x2194, "xhArr", "LEFT RIGHT ARROW" }, +{ 0x2194, "xharr", "LEFT RIGHT ARROW" }, +{ 0x2195, "varr", "UP DOWN ARROW" }, +{ 0x2196, "nwarr", "NORTH WEST ARROW" }, +{ 0x2197, "nearr", "NORTH EAST ARROW" }, +{ 0x2198, "drarr", "SOUTH EAST ARROW" }, +{ 0x2199, "dlarr", "SOUTH WEST ARROW" }, +{ 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" }, +{ 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" }, +{ 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" }, +{ 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" }, +{ 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" }, +{ 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" }, +{ 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" }, +{ 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" }, +{ 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" }, +{ 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" }, +{ 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" }, +{ 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" }, +{ 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" }, +{ 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" }, +{ 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" }, +{ 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" }, +{ 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" }, +{ 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" }, +{ 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" }, +{ 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" }, +{ 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" }, +{ 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" }, +{ 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" }, +{ 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" }, +{ 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" }, +{ 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" }, +{ 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" }, +{ 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" }, +{ 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" }, +{ 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" }, +{ 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" }, +{ 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" }, +{ 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" }, +{ 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" }, +{ 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" }, +{ 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" }, +{ 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" }, +{ 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" }, +{ 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" }, +{ 0x21D0, "lArr", "LEFTWARDS ARROW" }, +{ 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" }, +{ 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" }, +{ 0x21D2, "rArr", "RIGHTWARDS ARROW" }, +{ 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" }, +{ 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" }, +{ 0x21D4, "hArr", "" }, +{ 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" }, +{ 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" }, +{ 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" }, +{ 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" }, +{ 0x2200, "forall", "" }, +{ 0x2201, "comp", "COMPLEMENT" }, +{ 0x2202, "part", "" }, +{ 0x2203, "exist", "" }, +{ 0x2204, "nexist", "THERE DOES NOT EXIST" }, +{ 0x2205, "empty", "" }, +{ 0x2207, "nabla", "NABLA" }, +{ 0x2209, "notin", "" }, +{ 0x220A, "epsi", "" }, +{ 0x220A, "epsis", "" }, +{ 0x220A, "isin", "" }, +{ 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" }, +{ 0x220D, "ni", "" }, +{ 0x220F, "prod", "N-ARY PRODUCT" }, +{ 0x2210, "amalg", "N-ARY COPRODUCT" }, +{ 0x2210, "coprod", "N-ARY COPRODUCT" }, +{ 0x2210, "samalg", "" }, +{ 0x2211, "sum", "N-ARY SUMMATION" }, +{ 0x2212, "minus", "MINUS SIGN" }, +{ 0x2213, "mnplus", "" }, +{ 0x2214, "plusdo", "DOT PLUS" }, +{ 0x2216, "setmn", "SET MINUS" }, +{ 0x2216, "ssetmn", "SET MINUS" }, +{ 0x2217, "lowast", "ASTERISK OPERATOR" }, +{ 0x2218, "compfn", "RING OPERATOR" }, +{ 0x221A, "radic", "" }, +{ 0x221D, "prop", "" }, +{ 0x221D, "vprop", "" }, +{ 0x221E, "infin", "" }, +{ 0x221F, "ang90", "RIGHT ANGLE" }, +{ 0x2220, "ang", "ANGLE" }, +{ 0x2221, "angmsd", "MEASURED ANGLE" }, +{ 0x2222, "angsph", "" }, +{ 0x2223, "mid", "" }, +{ 0x2224, "nmid", "DOES NOT DIVIDE" }, +{ 0x2225, "par", "PARALLEL TO" }, +{ 0x2225, "spar", "PARALLEL TO" }, +{ 0x2226, "npar", "NOT PARALLEL TO" }, +{ 0x2226, "nspar", "NOT PARALLEL TO" }, +{ 0x2227, "and", "" }, +{ 0x2228, "or", "" }, +{ 0x2229, "cap", "" }, +{ 0x222A, "cup", "" }, +{ 0x222B, "int", "" }, +{ 0x222E, "conint", "" }, +{ 0x2234, "there4", "" }, +{ 0x2235, "becaus", "BECAUSE" }, +{ 0x223C, "sim", "" }, +{ 0x223C, "thksim", "TILDE OPERATOR" }, +{ 0x223D, "bsim", "" }, +{ 0x2240, "wreath", "WREATH PRODUCT" }, +{ 0x2241, "nsim", "" }, +{ 0x2243, "sime", "" }, +{ 0x2244, "nsime", "" }, +{ 0x2245, "cong", "" }, +{ 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" }, +{ 0x2248, "ap", "" }, +{ 0x2248, "thkap", "ALMOST EQUAL TO" }, +{ 0x2249, "nap", "NOT ALMOST EQUAL TO" }, +{ 0x224A, "ape", "" }, +{ 0x224C, "bcong", "ALL EQUAL TO" }, +{ 0x224D, "asymp", "EQUIVALENT TO" }, +{ 0x224E, "bump", "" }, +{ 0x224F, "bumpe", "" }, +{ 0x2250, "esdot", "" }, +{ 0x2251, "eDot", "" }, +{ 0x2252, "efDot", "" }, +{ 0x2253, "erDot", "" }, +{ 0x2254, "colone", "" }, +{ 0x2255, "ecolon", "" }, +{ 0x2256, "ecir", "" }, +{ 0x2257, "cire", "" }, +{ 0x2259, "wedgeq", "ESTIMATES" }, +{ 0x225C, "trie", "" }, +{ 0x2260, "ne", "" }, +{ 0x2261, "equiv", "" }, +{ 0x2262, "nequiv", "NOT IDENTICAL TO" }, +{ 0x2264, "le", "" }, +{ 0x2264, "les", "LESS-THAN OR EQUAL TO" }, +{ 0x2265, "ge", "GREATER-THAN OR EQUAL TO" }, +{ 0x2265, "ges", "GREATER-THAN OR EQUAL TO" }, +{ 0x2266, "lE", "" }, +{ 0x2267, "gE", "" }, +{ 0x2268, "lnE", "" }, +{ 0x2268, "lne", "" }, +{ 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" }, +{ 0x2269, "gnE", "" }, +{ 0x2269, "gne", "" }, +{ 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" }, +{ 0x226A, "Lt", "MUCH LESS-THAN" }, +{ 0x226B, "Gt", "MUCH GREATER-THAN" }, +{ 0x226C, "twixt", "BETWEEN" }, +{ 0x226E, "nlt", "NOT LESS-THAN" }, +{ 0x226F, "ngt", "NOT GREATER-THAN" }, +{ 0x2270, "nlE", "" }, +{ 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" }, +{ 0x2270, "nles", "" }, +{ 0x2271, "ngE", "" }, +{ 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" }, +{ 0x2271, "nges", "" }, +{ 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" }, +{ 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" }, +{ 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" }, +{ 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" }, +{ 0x2276, "lg", "LESS-THAN OR GREATER-THAN" }, +{ 0x2277, "gl", "" }, +{ 0x227A, "pr", "" }, +{ 0x227B, "sc", "" }, +{ 0x227C, "cupre", "" }, +{ 0x227C, "pre", "" }, +{ 0x227D, "sccue", "" }, +{ 0x227D, "sce", "" }, +{ 0x227E, "prap", "" }, +{ 0x227E, "prsim", "" }, +{ 0x227F, "scap", "" }, +{ 0x227F, "scsim", "" }, +{ 0x2280, "npr", "DOES NOT PRECEDE" }, +{ 0x2281, "nsc", "DOES NOT SUCCEED" }, +{ 0x2282, "sub", "" }, +{ 0x2283, "sup", "" }, +{ 0x2284, "nsub", "NOT A SUBSET OF" }, +{ 0x2285, "nsup", "NOT A SUPERSET OF" }, +{ 0x2286, "subE", "" }, +{ 0x2286, "sube", "" }, +{ 0x2287, "supE", "" }, +{ 0x2287, "supe", "" }, +{ 0x2288, "nsubE", "" }, +{ 0x2288, "nsube", "" }, +{ 0x2289, "nsupE", "" }, +{ 0x2289, "nsupe", "" }, +{ 0x228A, "subne", "" }, +{ 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" }, +{ 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" }, +{ 0x228B, "supnE", "" }, +{ 0x228B, "supne", "" }, +{ 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" }, +{ 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" }, +{ 0x228E, "uplus", "MULTISET UNION" }, +{ 0x228F, "sqsub", "" }, +{ 0x2290, "sqsup", "" }, +{ 0x2291, "sqsube", "" }, +{ 0x2292, "sqsupe", "" }, +{ 0x2293, "sqcap", "SQUARE CAP" }, +{ 0x2294, "sqcup", "SQUARE CUP" }, +{ 0x2295, "oplus", "CIRCLED PLUS" }, +{ 0x2296, "ominus", "CIRCLED MINUS" }, +{ 0x2297, "otimes", "CIRCLED TIMES" }, +{ 0x2298, "osol", "CIRCLED DIVISION SLASH" }, +{ 0x2299, "odot", "CIRCLED DOT OPERATOR" }, +{ 0x229A, "ocir", "CIRCLED RING OPERATOR" }, +{ 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" }, +{ 0x229D, "odash", "CIRCLED DASH" }, +{ 0x229E, "plusb", "SQUARED PLUS" }, +{ 0x229F, "minusb", "SQUARED MINUS" }, +{ 0x22A0, "timesb", "SQUARED TIMES" }, +{ 0x22A1, "sdotb", "SQUARED DOT OPERATOR" }, +{ 0x22A2, "vdash", "" }, +{ 0x22A3, "dashv", "" }, +{ 0x22A4, "top", "DOWN TACK" }, +{ 0x22A5, "bottom", "" }, +{ 0x22A5, "perp", "" }, +{ 0x22A7, "models", "MODELS" }, +{ 0x22A8, "vDash", "" }, +{ 0x22A9, "Vdash", "" }, +{ 0x22AA, "Vvdash", "" }, +{ 0x22AC, "nvdash", "DOES NOT PROVE" }, +{ 0x22AD, "nvDash", "NOT TRUE" }, +{ 0x22AE, "nVdash", "DOES NOT FORCE" }, +{ 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" }, +{ 0x22B2, "vltri", "" }, +{ 0x22B3, "vrtri", "" }, +{ 0x22B4, "ltrie", "" }, +{ 0x22B5, "rtrie", "" }, +{ 0x22B8, "mumap", "MULTIMAP" }, +{ 0x22BA, "intcal", "INTERCALATE" }, +{ 0x22BB, "veebar", "" }, +{ 0x22BC, "barwed", "NAND" }, +{ 0x22C4, "diam", "DIAMOND OPERATOR" }, +{ 0x22C5, "sdot", "DOT OPERATOR" }, +{ 0x22C6, "sstarf", "STAR OPERATOR" }, +{ 0x22C6, "star", "STAR OPERATOR" }, +{ 0x22C7, "divonx", "DIVISION TIMES" }, +{ 0x22C8, "bowtie", "" }, +{ 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" }, +{ 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" }, +{ 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" }, +{ 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" }, +{ 0x22CD, "bsime", "" }, +{ 0x22CE, "cuvee", "CURLY LOGICAL OR" }, +{ 0x22CF, "cuwed", "CURLY LOGICAL AND" }, +{ 0x22D0, "Sub", "" }, +{ 0x22D1, "Sup", "" }, +{ 0x22D2, "Cap", "DOUBLE INTERSECTION" }, +{ 0x22D3, "Cup", "DOUBLE UNION" }, +{ 0x22D4, "fork", "" }, +{ 0x22D6, "ldot", "" }, +{ 0x22D7, "gsdot", "" }, +{ 0x22D8, "Ll", "" }, +{ 0x22D9, "Gg", "VERY MUCH GREATER-THAN" }, +{ 0x22DA, "lEg", "" }, +{ 0x22DA, "leg", "" }, +{ 0x22DB, "gEl", "" }, +{ 0x22DB, "gel", "" }, +{ 0x22DC, "els", "" }, +{ 0x22DD, "egs", "" }, +{ 0x22DE, "cuepr", "" }, +{ 0x22DF, "cuesc", "" }, +{ 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" }, +{ 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" }, +{ 0x22E6, "lnsim", "" }, +{ 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" }, +{ 0x22E8, "prnap", "" }, +{ 0x22E8, "prnsim", "" }, +{ 0x22E9, "scnap", "" }, +{ 0x22E9, "scnsim", "" }, +{ 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" }, +{ 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" }, +{ 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" }, +{ 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" }, +{ 0x22EE, "vellip", "" }, +{ 0x2306, "Barwed", "PERSPECTIVE" }, +{ 0x2308, "lceil", "LEFT CEILING" }, +{ 0x2309, "rceil", "RIGHT CEILING" }, +{ 0x230A, "lfloor", "LEFT FLOOR" }, +{ 0x230B, "rfloor", "RIGHT FLOOR" }, +{ 0x230C, "drcrop", "BOTTOM RIGHT CROP" }, +{ 0x230D, "dlcrop", "BOTTOM LEFT CROP" }, +{ 0x230E, "urcrop", "TOP RIGHT CROP" }, +{ 0x230F, "ulcrop", "TOP LEFT CROP" }, +{ 0x2315, "telrec", "TELEPHONE RECORDER" }, +{ 0x2316, "target", "POSITION INDICATOR" }, +{ 0x231C, "ulcorn", "TOP LEFT CORNER" }, +{ 0x231D, "urcorn", "TOP RIGHT CORNER" }, +{ 0x231E, "dlcorn", "BOTTOM LEFT CORNER" }, +{ 0x231F, "drcorn", "BOTTOM RIGHT CORNER" }, +{ 0x2322, "frown", "" }, +{ 0x2322, "sfrown", "FROWN" }, +{ 0x2323, "smile", "" }, +{ 0x2323, "ssmile", "SMILE" }, +{ 0x2423, "blank", "OPEN BOX" }, +{ 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" }, +{ 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" }, +{ 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" }, +{ 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" }, +{ 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" }, +{ 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" }, +{ 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" }, +{ 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" }, +{ 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" }, +{ 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" }, +{ 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" }, +{ 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" }, +{ 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" }, +{ 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" }, +{ 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" }, +{ 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" }, +{ 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" }, +{ 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" }, +{ 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" }, +{ 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" }, +{ 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" }, +{ 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" }, +{ 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" }, +{ 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" }, +{ 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" }, +{ 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" }, +{ 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" }, +{ 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" }, +{ 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" }, +{ 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" }, +{ 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" }, +{ 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" }, +{ 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" }, +{ 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" }, +{ 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" }, +{ 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" }, +{ 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" }, +{ 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" }, +{ 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" }, +{ 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" }, +{ 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" }, +{ 0x2580, "uhblk", "UPPER HALF BLOCK" }, +{ 0x2584, "lhblk", "LOWER HALF BLOCK" }, +{ 0x2588, "block", "FULL BLOCK" }, +{ 0x2591, "blk14", "LIGHT SHADE" }, +{ 0x2592, "blk12", "MEDIUM SHADE" }, +{ 0x2593, "blk34", "DARK SHADE" }, +{ 0x25A1, "square", "WHITE SQUARE" }, +{ 0x25A1, "squ", "WHITE SQUARE" }, +{ 0x25AA, "squf", "" }, +{ 0x25AD, "rect", "WHITE RECTANGLE" }, +{ 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" }, +{ 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" }, +{ 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" }, +{ 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" }, +{ 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" }, +{ 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" }, +{ 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" }, +{ 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" }, +{ 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" }, +{ 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" }, +{ 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" }, +{ 0x25CA, "loz", "LOZENGE" }, +{ 0x25CB, "cir", "WHITE CIRCLE" }, +{ 0x25CB, "xcirc", "WHITE CIRCLE" }, +{ 0x2605, "starf", "BLACK STAR" }, +{ 0x260E, "phone", "TELEPHONE SIGN" }, +{ 0x2640, "female", "" }, +{ 0x2642, "male", "MALE SIGN" }, +{ 0x2660, "spades", "BLACK SPADE SUIT" }, +{ 0x2663, "clubs", "BLACK CLUB SUIT" }, +{ 0x2665, "hearts", "BLACK HEART SUIT" }, +{ 0x2666, "diams", "BLACK DIAMOND SUIT" }, +{ 0x2669, "sung", "" }, +{ 0x266D, "flat", "MUSIC FLAT SIGN" }, +{ 0x266E, "natur", "MUSIC NATURAL SIGN" }, +{ 0x266F, "sharp", "MUSIC SHARP SIGN" }, +{ 0x2713, "check", "CHECK MARK" }, +{ 0x2717, "cross", "BALLOT X" }, +{ 0x2720, "malt", "MALTESE CROSS" }, +{ 0x2726, "lozf", "" }, +{ 0x2736, "sext", "SIX POINTED BLACK STAR" }, +{ 0x3008, "lang", "" }, +{ 0x3009, "rang", "" }, +{ 0xE291, "rpargt", "" }, +{ 0xE2A2, "lnap", "" }, +{ 0xE2AA, "nsmid", "" }, +{ 0xE2B3, "prnE", "" }, +{ 0xE2B5, "scnE", "" }, +{ 0xE2B8, "vsubnE", "" }, +{ 0xE301, "smid", "" }, +{ 0xE411, "gnap", "" }, +{ 0xFB00, "fflig", "" }, +{ 0xFB01, "filig", "" }, +{ 0xFB02, "fllig", "" }, +{ 0xFB03, "ffilig", "" }, +{ 0xFB04, "ffllig", "" }, +{ 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" }, +}; + +/************************************************************************ + * * + * Commodity functions to handle entities * + * * + ************************************************************************/ + +/* + * Macro used to grow the current buffer. + */ +#define growBuffer(buffer) { \ + buffer##_size *= 2; \ + buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + xmlGenericError(xmlGenericErrorContext, "realloc failed"); \ + return(NULL); \ + } \ +} + +/** + * docbEntityLookup: + * @name: the entity name + * + * Lookup the given entity in EntitiesTable + * + * TODO: the linear scan is really ugly, an hash table is really needed. + * + * Returns the associated docbEntityDescPtr if found, NULL otherwise. + */ +static docbEntityDescPtr +docbEntityLookup(const xmlChar *name) { + unsigned int i; + + for (i = 0;i < (sizeof(docbookEntitiesTable)/ + sizeof(docbookEntitiesTable[0]));i++) { + if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name); +#endif + return(&docbookEntitiesTable[i]); + } + } + return(NULL); +} + +/** + * docbEntityValueLookup: + * @value: the entity's unicode value + * + * Lookup the given entity in EntitiesTable + * + * TODO: the linear scan is really ugly, an hash table is really needed. + * + * Returns the associated docbEntityDescPtr if found, NULL otherwise. + */ +static docbEntityDescPtr +docbEntityValueLookup(int value) { + unsigned int i; +#ifdef DEBUG + int lv = 0; +#endif + + for (i = 0;i < (sizeof(docbookEntitiesTable)/ + sizeof(docbookEntitiesTable[0]));i++) { + if (docbookEntitiesTable[i].value >= value) { + if (docbookEntitiesTable[i].value > value) + break; +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", docbookEntitiesTable[i].name); +#endif + return(&docbookEntitiesTable[i]); + } +#ifdef DEBUG + if (lv > docbookEntitiesTable[i].value) { + xmlGenericError(xmlGenericErrorContext, + "docbookEntitiesTable[] is not sorted (%d > %d)!\n", + lv, docbookEntitiesTable[i].value); + } + lv = docbookEntitiesTable[i].value; +#endif + } + return(NULL); +} + +#if 0 +/** + * UTF8ToSgml: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an ASCII + * plus SGML entities block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of octets consumed. + */ +int +UTF8ToSgml(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + const unsigned char* processed = in; + const unsigned char* outend; + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend; + unsigned int c, d; + int trailing; + + if (in == NULL) { + /* + * initialization nothing to do + */ + *outlen = 0; + *inlen = 0; + return(0); + } + inend = in + (*inlen); + outend = out + (*outlen); + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) + break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x80) { + if (out + 1 >= outend) + break; + *out++ = c; + } else { + int len; + docbEntityDescPtr ent; + + /* + * Try to lookup a predefined SGML entity for it + */ + + ent = docbEntityValueLookup(c); + if (ent == NULL) { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + len = strlen(ent->name); + if (out + 2 + len >= outend) + break; + *out++ = '&'; + memcpy(out, ent->name, len); + out += len; + *out++ = ';'; + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} +#endif + +/** + * docbEncodeEntities: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * @quoteChar: the quote character to escape (' or ") or zero. + * + * Take a block of UTF-8 chars in and try to convert it to an ASCII + * plus SGML entities block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of octets consumed. + */ +int +docbEncodeEntities(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen, int quoteChar) { + const unsigned char* processed = in; + const unsigned char* outend = out + (*outlen); + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend = in + (*inlen); + unsigned int c, d; + int trailing; + + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) + break; + + while (trailing--) { + if (((d= *in++) & 0xC0) != 0x80) { + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x80 && c != (unsigned int) quoteChar && c != '&' && c != '<' && c != '>') { + if (out >= outend) + break; + *out++ = c; + } else { + docbEntityDescPtr ent; + const char *cp; + char nbuf[16]; + int len; + + /* + * Try to lookup a predefined SGML entity for it + */ + ent = docbEntityValueLookup(c); + if (ent == NULL) { + snprintf(nbuf, sizeof(nbuf), "#%u", c); + cp = nbuf; + } + else + cp = ent->name; + len = strlen(cp); + if (out + 2 + len > outend) + break; + *out++ = '&'; + memcpy(out, cp, len); + out += len; + *out++ = ';'; + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + + +/************************************************************************ + * * + * Commodity functions to handle streams * + * * + ************************************************************************/ + +/** + * docbNewInputStream: + * @ctxt: an SGML parser context + * + * Create a new input stream structure + * Returns the new input stream or NULL + */ +static docbParserInputPtr +docbNewInputStream(docbParserCtxtPtr ctxt) { + docbParserInputPtr input; + + input = (xmlParserInputPtr) xmlMalloc(sizeof(docbParserInput)); + if (input == NULL) { + ctxt->errNo = XML_ERR_NO_MEMORY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "malloc: couldn't allocate a new input stream\n"); + return(NULL); + } + memset(input, 0, sizeof(docbParserInput)); + input->filename = NULL; + input->directory = NULL; + input->base = NULL; + input->cur = NULL; + input->buf = NULL; + input->line = 1; + input->col = 1; + input->buf = NULL; + input->free = NULL; + input->version = NULL; + input->consumed = 0; + input->length = 0; + return(input); +} + + +/************************************************************************ + * * + * Commodity functions, cleanup needed ? * + * * + ************************************************************************/ + +/** + * areBlanks: + * @ctxt: an SGML parser context + * @str: a xmlChar * + * @len: the size of @str + * + * Is this a sequence of blank chars that one can ignore ? + * + * Returns 1 if ignorable 0 otherwise. + */ + +static int areBlanks(docbParserCtxtPtr ctxt, const xmlChar *str, int len) { + int i; + xmlNodePtr lastChild; + + for (i = 0;i < len;i++) + if (!(IS_BLANK(str[i]))) return(0); + + if (CUR == 0) return(1); + if (CUR != '<') return(0); + if (ctxt->name == NULL) + return(1); + if (ctxt->node == NULL) return(0); + lastChild = xmlGetLastChild(ctxt->node); + if (lastChild == NULL) { + if ((ctxt->node->type != XML_ELEMENT_NODE) && + (ctxt->node->content != NULL)) return(0); + } else if (xmlNodeIsText(lastChild)) + return(0); + return(1); +} + +/************************************************************************ + * * + * External entities support * + * * + ************************************************************************/ + +/** + * docbParseCtxtExternalEntity: + * @ctx: the existing parsing context + * @URL: the URL for the entity to load + * @ID: the System ID for the entity to load + * @list: the return value for the set of parsed nodes + * + * Parse an external general entity within an existing parsing context + * + * Returns 0 if the entity is well formed, -1 in case of args problem and + * the parser error code otherwise + */ + +static int +docbParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, + const xmlChar *ID, xmlNodePtr *list) { + xmlParserCtxtPtr ctxt; + xmlDocPtr newDoc; + xmlSAXHandlerPtr oldsax = NULL; + int ret = 0; + + if (ctx->depth > 40) { + return(XML_ERR_ENTITY_LOOP); + } + + if (list != NULL) + *list = NULL; + if ((URL == NULL) && (ID == NULL)) + return(-1); + if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ + return(-1); + + + ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL); + if (ctxt == NULL) return(-1); + ctxt->userData = ctxt; + oldsax = ctxt->sax; + ctxt->sax = ctx->sax; + newDoc = xmlNewDoc(BAD_CAST "1.0"); + if (newDoc == NULL) { + xmlFreeParserCtxt(ctxt); + return(-1); + } + if (ctx->myDoc != NULL) { + newDoc->intSubset = ctx->myDoc->intSubset; + newDoc->extSubset = ctx->myDoc->extSubset; + } + if (ctx->myDoc->URL != NULL) { + newDoc->URL = xmlStrdup(ctx->myDoc->URL); + } + newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); + if (newDoc->children == NULL) { + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + newDoc->intSubset = NULL; + newDoc->extSubset = NULL; + xmlFreeDoc(newDoc); + return(-1); + } + nodePush(ctxt, newDoc->children); + if (ctx->myDoc == NULL) { + ctxt->myDoc = newDoc; + } else { + ctxt->myDoc = ctx->myDoc; + newDoc->children->doc = ctx->myDoc; + } + + /* + * Parse a possible text declaration first + */ + GROW; + if ((RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + xmlParseTextDecl(ctxt); + } + + /* + * Doing validity checking on chunk doesn't make sense + */ + ctxt->instate = XML_PARSER_CONTENT; + ctxt->validate = ctx->validate; + ctxt->loadsubset = ctx->loadsubset; + ctxt->depth = ctx->depth + 1; + ctxt->replaceEntities = ctx->replaceEntities; + if (ctxt->validate) { + ctxt->vctxt.error = ctx->vctxt.error; + ctxt->vctxt.warning = ctx->vctxt.warning; + /* Allocate the Node stack */ + ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr)); + if (ctxt->vctxt.nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "docbParseCtxtExternalEntity: out of memory\n"); + ctxt->validate = 0; + ctxt->vctxt.error = NULL; + ctxt->vctxt.warning = NULL; + } else { + ctxt->vctxt.nodeNr = 0; + ctxt->vctxt.nodeMax = 4; + ctxt->vctxt.node = NULL; + } + } else { + ctxt->vctxt.error = NULL; + ctxt->vctxt.warning = NULL; + } + + docbParseContent(ctxt); + + if ((RAW == '<') && (NXT(1) == '/')) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else if (RAW != 0) { + ctxt->errNo = XML_ERR_EXTRA_CONTENT; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "extra content at the end of well balanced chunk\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (ctxt->node != newDoc->children) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + if (!ctxt->wellFormed) { + if (ctxt->errNo == 0) + ret = 1; + else + ret = ctxt->errNo; + } else { + if (list != NULL) { + xmlNodePtr cur; + + /* + * Return the newly created nodeset after unlinking it from + * they pseudo parent. + */ + cur = newDoc->children->children; + *list = cur; + while (cur != NULL) { + cur->parent = NULL; + cur = cur->next; + } + newDoc->children->children = NULL; + } + ret = 0; + } + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + newDoc->intSubset = NULL; + newDoc->extSubset = NULL; + xmlFreeDoc(newDoc); + + return(ret); +} + +/************************************************************************ + * * + * The parser itself * + * * + ************************************************************************/ + +/** + * docbParseSGMLName: + * @ctxt: an SGML parser context + * + * parse an SGML tag or attribute name, note that we convert it to lowercase + * since SGML names are not case-sensitive. + * + * Returns the Tag Name parsed or NULL + */ + +static xmlChar * +docbParseSGMLName(docbParserCtxtPtr ctxt) { + xmlChar *ret = NULL; + int i = 0; + xmlChar loc[DOCB_PARSER_BUFFER_SIZE]; + + if (!IS_LETTER(CUR) && (CUR != '_') && + (CUR != ':')) return(NULL); + + while ((i < DOCB_PARSER_BUFFER_SIZE) && + ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == ':') || (CUR == '_'))) { + if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; + else loc[i] = CUR; + i++; + + NEXT; + } + + ret = xmlStrndup(loc, i); + + return(ret); +} + +/** + * docbParseName: + * @ctxt: an SGML parser context + * + * parse an SGML name, this routine is case sensitive. + * + * Returns the Name parsed or NULL + */ + +static xmlChar * +docbParseName(docbParserCtxtPtr ctxt) { + xmlChar buf[DOCB_MAX_NAMELEN]; + int len = 0; + + GROW; + if (!IS_LETTER(CUR) && (CUR != '_')) { + return(NULL); + } + + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) { + buf[len++] = CUR; + NEXT; + if (len >= DOCB_MAX_NAMELEN) { + xmlGenericError(xmlGenericErrorContext, + "docbParseName: reached DOCB_MAX_NAMELEN limit\n"); + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + break; + } + } + return(xmlStrndup(buf, len)); +} + +/** + * docbParseSGMLAttribute: + * @ctxt: an SGML parser context + * @stop: a char stop value + * + * parse an SGML attribute value till the stop (quote), if + * stop is 0 then it stops at the first space + * + * Returns the attribute parsed or NULL + */ + +static xmlChar * +docbParseSGMLAttribute(docbParserCtxtPtr ctxt, const xmlChar stop) { + xmlChar *buffer = NULL; + int buffer_size = 0; + xmlChar *out = NULL; + xmlChar *name = NULL; + + xmlChar *cur = NULL; + xmlEntityPtr xent; + docbEntityDescPtr ent; + + /* + * allocate a translation buffer. + */ + buffer_size = DOCB_PARSER_BIG_BUFFER_SIZE; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, + "docbParseSGMLAttribute: malloc failed"); + return(NULL); + } + out = buffer; + + /* + * Ok loop until we reach one of the ending chars + */ + while ((CUR != 0) && (CUR != stop) && (CUR != '>')) { + if ((stop == 0) && (IS_BLANK(CUR))) break; + if (CUR == '&') { + if (NXT(1) == '#') { + unsigned int c; + int bits; + + c = docbParseCharRef(ctxt); + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + } else { + xent = docbParseEntityRef(ctxt, &name); + if (name == NULL) { + *out++ = '&'; + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + *out++ = '&'; + } else { + ent = docbEntityLookup(name); + if (ent == NULL) { + *out++ = '&'; + cur = name; + while (*cur != 0) { + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + *out++ = *cur++; + } + xmlFree(name); + } else { + unsigned int c; + int bits; + + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + c = (xmlChar)ent->value; + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + xmlFree(name); + } + } + } + } else { + unsigned int c; + int bits; + + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + c = CUR; + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + NEXT; + } + } + *out++ = 0; + return(buffer); +} + + +/** + * docbParseEntityRef: + * @ctxt: an SGML parser context + * @str: location to store the entity name + * + * parse an SGML ENTITY references + * + * [68] EntityRef ::= '&' Name ';' + * + * Returns the associated xmlEntityPtr if found, or NULL otherwise, + * if non-NULL *str will have to be freed by the caller. + */ +static xmlEntityPtr +docbParseEntityRef(docbParserCtxtPtr ctxt, xmlChar **str) { + xmlChar *name; + xmlEntityPtr ent = NULL; + *str = NULL; + + if (CUR == '&') { + NEXT; + name = docbParseName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseEntityRef: no name\n"); + ctxt->wellFormed = 0; + } else { + GROW; + if (CUR == ';') { + *str = name; + + /* + * Ask first SAX for entity resolution, otherwise try the + * predefined set. + */ + if (ctxt->sax != NULL) { + if (ctxt->sax->getEntity != NULL) + ent = ctxt->sax->getEntity(ctxt->userData, name); + if (ent == NULL) + ent = xmlGetPredefinedEntity(name); + } + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseEntityRef: expecting ';'\n"); + *str = name; + } + } + } + return(ent); +} + +/** + * docbParseAttValue: + * @ctxt: an SGML parser context + * + * parse a value for an attribute + * Note: the parser won't do substitution of entities here, this + * will be handled later in xmlStringGetNodeList, unless it was + * asked for ctxt->replaceEntities != 0 + * + * Returns the AttValue parsed or NULL. + */ + +static xmlChar * +docbParseAttValue(docbParserCtxtPtr ctxt) { + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + ret = docbParseSGMLAttribute(ctxt, '"'); + if (CUR != '"') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); + ctxt->wellFormed = 0; + } else + NEXT; + } else if (CUR == '\'') { + NEXT; + ret = docbParseSGMLAttribute(ctxt, '\''); + if (CUR != '\'') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); + ctxt->wellFormed = 0; + } else + NEXT; + } else { + /* + * That's an SGMLism, the attribute value may not be quoted + */ + ret = docbParseSGMLAttribute(ctxt, 0); + if (ret == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: no value found\n"); + ctxt->wellFormed = 0; + } + } + return(ret); +} + +/** + * docbParseSystemLiteral: + * @ctxt: an SGML parser context + * + * parse an SGML Literal + * + * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") + * + * Returns the SystemLiteral parsed or NULL + */ + +static xmlChar * +docbParseSystemLiteral(docbParserCtxtPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) + NEXT; + if (!IS_CHAR(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_CHAR(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + } + + return(ret); +} + +/** + * docbParsePubidLiteral: + * @ctxt: an SGML parser context + * + * parse an SGML public literal + * + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + * + * Returns the PubidLiteral parsed or NULL. + */ + +static xmlChar * +docbParsePubidLiteral(docbParserCtxtPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + /* + * Name ::= (Letter | '_') (NameChar)* + */ + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while (IS_PUBIDCHAR(CUR)) NEXT; + if (CUR != '"') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_LETTER(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_LETTER(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + } + + return(ret); +} + +/** + * docbParseCharData: + * @ctxt: an SGML parser context + * @cdata: int indicating whether we are within a CDATA section + * + * parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +static void +docbParseCharData(docbParserCtxtPtr ctxt) { + xmlChar buf[DOCB_PARSER_BIG_BUFFER_SIZE + 5]; + int nbchar = 0; + int cur, l; + + SHRINK; + cur = CUR_CHAR(l); + while (((cur != '<') || (ctxt->token == '<')) && + ((cur != '&') || (ctxt->token == '&')) && + (IS_CHAR(cur))) { + COPY_BUF(l,buf,nbchar,cur); + if (nbchar >= DOCB_PARSER_BIG_BUFFER_SIZE) { + /* + * Ok the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + buf, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + nbchar = 0; + } + NEXTL(l); + cur = CUR_CHAR(l); + } + if (nbchar != 0) { + /* + * Ok the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + } +} + +/** + * docbParseExternalID: + * @ctxt: an SGML parser context + * @publicID: a xmlChar** receiving PubidLiteral + * + * Parse an External ID or a Public ID + * + * Returns the function returns SystemLiteral and in the second + * case publicID receives PubidLiteral, + * it is possible to return NULL and have publicID set. + */ + +static xmlChar * +docbParseExternalID(docbParserCtxtPtr ctxt, xmlChar **publicID) { + xmlChar *URI = NULL; + + if ((UPPER == 'S') && (UPP(1) == 'Y') && + (UPP(2) == 'S') && (UPP(3) == 'T') && + (UPP(4) == 'E') && (UPP(5) == 'M')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'SYSTEM'\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + URI = docbParseSystemLiteral(ctxt); + if (URI == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseExternalID: SYSTEM, no URI\n"); + ctxt->wellFormed = 0; + } + } else if ((UPPER == 'P') && (UPP(1) == 'U') && + (UPP(2) == 'B') && (UPP(3) == 'L') && + (UPP(4) == 'I') && (UPP(5) == 'C')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'PUBLIC'\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + *publicID = docbParsePubidLiteral(ctxt); + if (*publicID == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseExternalID: PUBLIC, no Public Identifier\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + if ((CUR == '"') || (CUR == '\'')) { + URI = docbParseSystemLiteral(ctxt); + } + } + return(URI); +} + +/** + * docbParsePI: + * @ctxt: an XML parser context + * + * parse an XML Processing Instruction. + * + * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' + * + * The processing is transfered to SAX once parsed. + */ + +static void +docbParsePI(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = DOCB_PARSER_BUFFER_SIZE; + int cur, l; + xmlChar *target; + xmlParserInputState state; + int count = 0; + + if ((RAW == '<') && (NXT(1) == '?')) { + xmlParserInputPtr input = ctxt->input; + state = ctxt->instate; + ctxt->instate = XML_PARSER_PI; + /* + * this is a Processing Instruction. + */ + SKIP(2); + SHRINK; + + /* + * Parse the target name and check for special support like + * namespace. + */ + target = xmlParseName(ctxt); + if (target != NULL) { + xmlChar *encoding = NULL; + + if ((RAW == '?') && (NXT(1) == '>')) { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PI declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP(2); + + /* + * SAX: PI detected. + */ + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->processingInstruction != NULL)) + ctxt->sax->processingInstruction(ctxt->userData, + target, NULL); + ctxt->instate = state; + xmlFree(target); + return; + } + if (xmlStrEqual(target, BAD_CAST "sgml-declaration")) { + + encoding = xmlParseEncodingDecl(ctxt); + if (encoding == NULL) { + xmlGenericError(xmlGenericErrorContext, + "sgml-declaration: failed to find/handle encoding\n"); +#ifdef DEBUG + } else { + xmlGenericError(xmlGenericErrorContext, + "switched to encoding %s\n", encoding); +#endif + } + + } + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + cur = CUR; + if (encoding != NULL) { + len = snprintf((char *) buf, size - 1, + " encoding = \"%s\"", encoding); + if (len < 0) + len = size; + } else { + if (!IS_BLANK(cur)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParsePI: PI %s space expected\n", target); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + } + cur = CUR_CHAR(l); + while (IS_CHAR(cur) && /* checked */ + ((cur != '?') || (NXT(1) != '>'))) { + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + } + count++; + if (count > 50) { + GROW; + count = 0; + } + COPY_BUF(l,buf,len,cur); + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (cur != '?') { + ctxt->errNo = XML_ERR_PI_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParsePI: PI %s never end ...\n", target); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PI declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP(2); + + /* + * SAX: PI detected. + */ + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->processingInstruction != NULL)) + ctxt->sax->processingInstruction(ctxt->userData, + target, buf); + } + xmlFree(buf); + xmlFree(target); + } else { + ctxt->errNo = XML_ERR_PI_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParsePI : no target name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + ctxt->instate = state; + } +} + +/** + * docbParseComment: + * @ctxt: an SGML parser context + * + * Parse an XML (SGML) comment <!-- .... --> + * + * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' + */ +static void +docbParseComment(docbParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len; + int size = DOCB_PARSER_BUFFER_SIZE; + int q, ql; + int r, rl; + int cur, l; + xmlParserInputState state; + + /* + * Check that there is a comment right here. + */ + if ((RAW != '<') || (NXT(1) != '!') || + (NXT(2) != '-') || (NXT(3) != '-')) return; + + state = ctxt->instate; + ctxt->instate = XML_PARSER_COMMENT; + SHRINK; + SKIP(4); + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + q = CUR_CHAR(ql); + NEXTL(ql); + r = CUR_CHAR(rl); + NEXTL(rl); + cur = CUR_CHAR(l); + len = 0; + while (IS_CHAR(cur) && + ((cur != '>') || + (r != '-') || (q != '-'))) { + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + } + COPY_BUF(ql,buf,len,q); + q = r; + ql = rl; + r = cur; + rl = l; + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (!IS_CHAR(cur)) { + ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Comment not terminated \n<!--%.50s\n", buf); + ctxt->wellFormed = 0; + xmlFree(buf); + } else { + NEXT; + if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->comment(ctxt->userData, buf); + xmlFree(buf); + } + ctxt->instate = state; +} + +/** + * docbParseCharRef: + * @ctxt: an SGML parser context + * + * parse Reference declarations + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * Returns the value parsed (as an int) + */ +static int +docbParseCharRef(docbParserCtxtPtr ctxt) { + int val = 0; + + if ((CUR == '&') && (NXT(1) == '#') && + (NXT(2) == 'x')) { + SKIP(3); + while (CUR != ';') { + if ((CUR >= '0') && (CUR <= '9')) + val = val * 16 + (CUR - '0'); + else if ((CUR >= 'a') && (CUR <= 'f')) + val = val * 16 + (CUR - 'a') + 10; + else if ((CUR >= 'A') && (CUR <= 'F')) + val = val * 16 + (CUR - 'A') + 10; + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseCharRef: invalid hexadecimal value\n"); + ctxt->wellFormed = 0; + val = 0; + break; + } + NEXT; + } + if (CUR == ';') + NEXT; + } else if ((CUR == '&') && (NXT(1) == '#')) { + SKIP(2); + while (CUR != ';') { + if ((CUR >= '0') && (CUR <= '9')) + val = val * 10 + (CUR - '0'); + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseCharRef: invalid decimal value\n"); + ctxt->wellFormed = 0; + val = 0; + break; + } + NEXT; + } + if (CUR == ';') + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid value\n"); + ctxt->wellFormed = 0; + } + /* + * Check the value IS_CHAR ... + */ + if (IS_CHAR(val)) { + return(val); + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid xmlChar value %d\n", + val); + ctxt->wellFormed = 0; + } + return(0); +} + + +/** + * docbParseDocTypeDecl: + * @ctxt: an SGML parser context + * + * parse a DOCTYPE declaration + * + * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? + * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' + */ + +static void +docbParseDocTypeDecl(docbParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *ExternalID = NULL; + xmlChar *URI = NULL; + + /* + * We know that '<!DOCTYPE' has been detected. + */ + SKIP(9); + + SKIP_BLANKS; + + /* + * Parse the DOCTYPE name. + */ + name = docbParseName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "docbParseDocTypeDecl : no DOCTYPE name !\n"); + ctxt->wellFormed = 0; + } + /* + * Check that upper(name) == "SGML" !!!!!!!!!!!!! + */ + + SKIP_BLANKS; + + /* + * Check for SystemID and ExternalID + */ + URI = docbParseExternalID(ctxt, &ExternalID); + SKIP_BLANKS; + + /* + * Create or update the document accordingly to the DOCTYPE + * But use the predefined PUBLIC and SYSTEM ID of DocBook XML + */ + if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->internalSubset(ctxt->userData, name, + XML_DOCBOOK_XML_PUBLIC, + XML_DOCBOOK_XML_SYSTEM); + + if (RAW != '>') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseDocTypeDecl : internal subset not handled\n"); + } else { + NEXT; + } + + /* + * Cleanup, since we don't use all those identifiers + */ + if (URI != NULL) xmlFree(URI); + if (ExternalID != NULL) xmlFree(ExternalID); + if (name != NULL) xmlFree(name); +} + +/** + * docbParseAttribute: + * @ctxt: an SGML parser context + * @value: a xmlChar ** used to store the value of the attribute + * + * parse an attribute + * + * [41] Attribute ::= Name Eq AttValue + * + * [25] Eq ::= S? '=' S? + * + * With namespace: + * + * [NS 11] Attribute ::= QName Eq AttValue + * + * Also the case QName == xmlns:??? is handled independently as a namespace + * definition. + * + * Returns the attribute name, and the value in *value. + */ + +static xmlChar * +docbParseAttribute(docbParserCtxtPtr ctxt, xmlChar **value) { + xmlChar *name, *val = NULL; + + *value = NULL; + name = docbParseName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); + ctxt->wellFormed = 0; + return(NULL); + } + + /* + * read the value + */ + SKIP_BLANKS; + if (CUR == '=') { + NEXT; + SKIP_BLANKS; + val = docbParseAttValue(ctxt); + /****** + } else { + * TODO : some attribute must have values, some may not + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->warning(ctxt->userData, + "No value for attribute %s\n", name); */ + } + + *value = val; + return(name); +} + +/** + * docbCheckEncoding: + * @ctxt: an SGML parser context + * @attvalue: the attribute value + * + * Checks an http-equiv attribute from a Meta tag to detect + * the encoding + * If a new encoding is detected the parser is switched to decode + * it and pass UTF8 + */ +static void +docbCheckEncoding(docbParserCtxtPtr ctxt, const xmlChar *attvalue) { + const xmlChar *encoding; + + if ((ctxt == NULL) || (attvalue == NULL)) + return; + + encoding = xmlStrstr(attvalue, BAD_CAST"charset="); + if (encoding == NULL) + encoding = xmlStrstr(attvalue, BAD_CAST"Charset="); + if (encoding == NULL) + encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET="); + if (encoding != NULL) { + encoding += 8; + } else { + encoding = xmlStrstr(attvalue, BAD_CAST"charset ="); + if (encoding == NULL) + encoding = xmlStrstr(attvalue, BAD_CAST"Charset ="); + if (encoding == NULL) + encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET ="); + if (encoding != NULL) + encoding += 9; + } + /* + * Restricted from 2.3.5 */ + if (encoding != NULL) { + xmlCharEncoding enc; + + if (ctxt->input->encoding != NULL) + xmlFree((xmlChar *) ctxt->input->encoding); + ctxt->input->encoding = encoding; + + enc = xmlParseCharEncoding((const char *) encoding); + if (enc == XML_CHAR_ENCODING_8859_1) { + ctxt->charset = XML_CHAR_ENCODING_8859_1; + } else if (enc != XML_CHAR_ENCODING_UTF8) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Unsupported encoding %s\n", encoding); + /* xmlFree(encoding); */ + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + } + } +} + +/** + * docbCheckMeta: + * @ctxt: an SGML parser context + * @atts: the attributes values + * + * Checks an attributes from a Meta tag + */ +static void +docbCheckMeta(docbParserCtxtPtr ctxt, const xmlChar **atts) { + int i; + const xmlChar *att, *value; + int http = 0; + const xmlChar *content = NULL; + + if ((ctxt == NULL) || (atts == NULL)) + return; + + i = 0; + att = atts[i++]; + while (att != NULL) { + value = atts[i++]; + if ((value != NULL) && + ((xmlStrEqual(att, BAD_CAST"http-equiv")) || + (xmlStrEqual(att, BAD_CAST"Http-Equiv")) || + (xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) && + ((xmlStrEqual(value, BAD_CAST"Content-Type")) || + (xmlStrEqual(value, BAD_CAST"content-type")) || + (xmlStrEqual(value, BAD_CAST"CONTENT-TYPE")))) + http = 1; + else if ((value != NULL) && + ((xmlStrEqual(att, BAD_CAST"content")) || + (xmlStrEqual(att, BAD_CAST"Content")) || + (xmlStrEqual(att, BAD_CAST"CONTENT")))) + content = value; + att = atts[i++]; + } + if ((http) && (content != NULL)) + docbCheckEncoding(ctxt, content); + +} + +/** + * docbParseStartTag: + * @ctxt: an SGML parser context + * + * parse a start of tag either for rule element or + * EmptyElement. In both case we don't parse the tag closing chars. + * + * [40] STag ::= '<' Name (S Attribute)* S? '>' + * + * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' + * + * With namespace: + * + * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' + * + * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' + * + */ + +static void +docbParseStartTag(docbParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *attname; + xmlChar *attvalue; + const xmlChar **atts = NULL; + int nbatts = 0; + int maxatts = 0; + int meta = 0; + int i; + + if (CUR != '<') return; + NEXT; + + GROW; + name = docbParseSGMLName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseStartTag: invalid element name\n"); + ctxt->wellFormed = 0; + return; + } + if (xmlStrEqual(name, BAD_CAST"meta")) + meta = 1; + + /* + * Check for auto-closure of SGML elements. + */ + docbAutoClose(ctxt, name); + + /* + * Now parse the attributes, it ends up with the ending + * + * (S Attribute)* S? + */ + SKIP_BLANKS; + while ((IS_CHAR(CUR)) && + (CUR != '>') && + ((CUR != '/') || (NXT(1) != '>'))) { + long cons = ctxt->nbChars; + + GROW; + attname = docbParseAttribute(ctxt, &attvalue); + if (attname != NULL) { + + /* + * Well formedness requires at most one declaration of an attribute + */ + for (i = 0; i < nbatts;i += 2) { + if (xmlStrEqual(atts[i], attname)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Attribute %s redefined\n", + attname); + ctxt->wellFormed = 0; + xmlFree(attname); + if (attvalue != NULL) + xmlFree(attvalue); + goto failed; + } + } + + /* + * Add the pair to atts + */ + if (atts == NULL) { + maxatts = 10; + atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); + if (atts == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %ld byte failed\n", + maxatts * (long)sizeof(xmlChar *)); + if (name != NULL) xmlFree(name); + return; + } + } else if (nbatts + 4 > maxatts) { + maxatts *= 2; + atts = (const xmlChar **) xmlRealloc((void *)atts, maxatts * sizeof(xmlChar *)); + if (atts == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %ld byte failed\n", + maxatts * (long)sizeof(xmlChar *)); + if (name != NULL) xmlFree(name); + return; + } + } + atts[nbatts++] = attname; + atts[nbatts++] = attvalue; + atts[nbatts] = NULL; + atts[nbatts + 1] = NULL; + } + +failed: + SKIP_BLANKS; + if (cons == ctxt->nbChars) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseStartTag: problem parsing attributes\n"); + ctxt->wellFormed = 0; + break; + } + } + + /* + * Handle specific association to the META tag + */ + if (meta) + docbCheckMeta(ctxt, atts); + + /* + * SAX: Start of Element ! + */ + docbnamePush(ctxt, xmlStrdup(name)); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, name, atts); + + if (atts != NULL) { + for (i = 0;i < nbatts;i++) { + if (atts[i] != NULL) + xmlFree((xmlChar *) atts[i]); + } + xmlFree((void *) atts); + } + if (name != NULL) xmlFree(name); +} + +/** + * docbParseEndTag: + * @ctxt: an SGML parser context + * + * parse an end of tag + * + * [42] ETag ::= '</' Name S? '>' + * + * With namespace + * + * [NS 9] ETag ::= '</' QName S? '>' + */ + +static void +docbParseEndTag(docbParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *oldname; + int i; + + if ((CUR != '<') || (NXT(1) != '/')) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "docbParseEndTag: '</' not found\n"); + ctxt->wellFormed = 0; + return; + } + SKIP(2); + + name = docbParseSGMLName(ctxt); + if (name == NULL) { + if (CUR == '>') { + NEXT; + oldname = docbnamePop(ctxt); + if (oldname != NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag </>: popping out %s\n", oldname); +#endif + xmlFree(oldname); +#ifdef DEBUG + } else { + xmlGenericError(xmlGenericErrorContext,"End of tag </>: stack empty !!!\n"); +#endif + } + return; + } else + return; + } + + /* + * We should definitely be at the ending "S? '>'" part + */ + SKIP_BLANKS; + if ((!IS_CHAR(CUR)) || (CUR != '>')) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); + ctxt->wellFormed = 0; + } else + NEXT; + + /* + * If the name read is not one of the element in the parsing stack + * then return, it's just an error. + */ + for (i = (ctxt->nameNr - 1);i >= 0;i--) { + if (xmlStrEqual(name, ctxt->nameTab[i])) break; + } + if (i < 0) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Unexpected end tag : %s\n", name); + xmlFree(name); + ctxt->wellFormed = 0; + return; + } + + + /* + * Check for auto-closure of SGML elements. + */ + + docbAutoCloseOnClose(ctxt, name); + + /* + * Well formedness constraints, opening and closing must match. + * With the exception that the autoclose may have popped stuff out + * of the stack. + */ + if (((name[0] != '/') || (name[1] != 0)) && + (!xmlStrEqual(name, ctxt->name))) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name); +#endif + if ((ctxt->name != NULL) && + (!xmlStrEqual(ctxt->name, name))) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Opening and ending tag mismatch: %s and %s\n", + name, ctxt->name); + ctxt->wellFormed = 0; + } + } + + /* + * SAX: End of Tag + */ + oldname = ctxt->name; + if (((name[0] == '/') && (name[1] == 0)) || + ((oldname != NULL) && (xmlStrEqual(oldname, name)))) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = docbnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname); +#endif + xmlFree(oldname); +#ifdef DEBUG + } else { + xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name); +#endif + } + } + + if (name != NULL) + xmlFree(name); + + return; +} + + +/** + * docbParseReference: + * @ctxt: an SGML parser context + * + * parse and handle entity references in content, + * this will end-up in a call to character() since this is either a + * CharRef, or a predefined entity. + */ +static void +docbParseReference(docbParserCtxtPtr ctxt) { + docbEntityDescPtr ent; + xmlEntityPtr xent; + xmlChar out[6]; + xmlChar *name; + if (CUR != '&') return; + + if (NXT(1) == '#') { + unsigned int c; + int bits, i = 0; + + c = docbParseCharRef(ctxt); + if (c < 0x80) { out[i++]= c; bits= -6; } + else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + out[i++]= ((c >> bits) & 0x3F) | 0x80; + } + out[i] = 0; + + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, out, i); + } else { + /* + * Lookup the entity in the table. + */ + xent = docbParseEntityRef(ctxt, &name); + if (xent != NULL) { + if (((ctxt->replaceEntities) || (ctxt->loadsubset)) && + ((xent->children == NULL) && + (xent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))) { + /* + * we really need to fetch and parse the external entity + */ + int parse; + xmlNodePtr children = NULL; + + parse = docbParseCtxtExternalEntity(ctxt, + xent->SystemID, xent->ExternalID, &children); + xmlAddChildList((xmlNodePtr) xent, children); + } + if (ctxt->replaceEntities) { + if ((ctxt->node != NULL) && (xent->children != NULL)) { + /* + * Seems we are generating the DOM content, do + * a simple tree copy + */ + xmlNodePtr new; + new = xmlCopyNodeList(xent->children); + + xmlAddChildList(ctxt->node, new); + /* + * This is to avoid a nasty side effect, see + * characters() in SAX.c + */ + ctxt->nodemem = 0; + ctxt->nodelen = 0; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + /* + * Create a node. + */ + ctxt->sax->reference(ctxt->userData, xent->name); + } + } + } else if (name != NULL) { + ent = docbEntityLookup(name); + if ((ent == NULL) || (ent->value <= 0)) { + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) { + ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1); + ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name)); + /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */ + } + } else { + unsigned int c; + int bits, i = 0; + + c = ent->value; + if (c < 0x80) + { out[i++]= c; bits= -6; } + else if (c < 0x800) + { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + out[i++]= ((c >> bits) & 0x3F) | 0x80; + } + out[i] = 0; + + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, out, i); + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1); + return; + } + if (name != NULL) + xmlFree(name); + } +} + +/** + * docbParseContent: + * @ctxt: an SGML parser context + * @name: the node name + * + * Parse a content: comment, sub-element, reference or text. + * + */ +static void +docbParseContent(docbParserCtxtPtr ctxt) +{ + xmlChar *currentNode; + int depth; + + currentNode = xmlStrdup(ctxt->name); + depth = ctxt->nameNr; + while (1) { + long cons = ctxt->nbChars; + + GROW; + /* + * Our tag or one of it's parent or children is ending. + */ + if ((CUR == '<') && (NXT(1) == '/')) { + docbParseEndTag(ctxt); + if (currentNode != NULL) + xmlFree(currentNode); + return; + } + + /* + * Has this node been popped out during parsing of + * the next element + */ + if ((!xmlStrEqual(currentNode, ctxt->name)) && + (depth >= ctxt->nameNr)) { + if (currentNode != NULL) + xmlFree(currentNode); + return; + } + + /* + * Sometimes DOCTYPE arrives in the middle of the document + */ + if ((CUR == '<') && (NXT(1) == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Misplaced DOCTYPE declaration\n"); + ctxt->wellFormed = 0; + docbParseDocTypeDecl(ctxt); + } + + /* + * First case : a comment + */ + if ((CUR == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) { + docbParseComment(ctxt); + } + + /* + * Second case : a PI + */ + else if ((RAW == '<') && (NXT(1) == '?')) { + docbParsePI(ctxt); + } + + /* + * Third case : a sub-element. + */ + else if (CUR == '<') { + docbParseElement(ctxt); + } + + /* + * Fourth case : a reference. If if has not been resolved, + * parsing returns it's Name, create the node + */ + else if (CUR == '&') { + docbParseReference(ctxt); + } + + /* + * Fifth : end of the resource + */ + else if (CUR == 0) { + docbAutoClose(ctxt, NULL); + if (ctxt->nameNr == 0) + break; + } + + /* + * Last case, text. Note that References are handled directly. + */ + else { + docbParseCharData(ctxt); + } + + if (cons == ctxt->nbChars) { + if (ctxt->node != NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "detected an error in element content\n"); + ctxt->wellFormed = 0; + } + break; + } + + GROW; + } + if (currentNode != NULL) + xmlFree(currentNode); +} + +/** + * docbParseElement: + * @ctxt: an SGML parser context + * + * parse an SGML element, this is highly recursive + * + * [39] element ::= EmptyElemTag | STag content ETag + * + * [41] Attribute ::= Name Eq AttValue + */ + +static void +docbParseElement(docbParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *currentNode = NULL; + docbElemDescPtr info; + docbParserNodeInfo node_info; + xmlChar *oldname; + int depth = ctxt->nameNr; + + /* Capture start position */ + if (ctxt->record_info) { + node_info.begin_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.begin_line = ctxt->input->line; + } + + oldname = xmlStrdup(ctxt->name); + docbParseStartTag(ctxt); + name = ctxt->name; +#ifdef DEBUG + if (oldname == NULL) + xmlGenericError(xmlGenericErrorContext, + "Start of element %s\n", name); + else if (name == NULL) + xmlGenericError(xmlGenericErrorContext, + "Start of element failed, was %s\n", oldname); + else + xmlGenericError(xmlGenericErrorContext, + "Start of element %s, was %s\n", name, oldname); +#endif + if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) || + (name == NULL)) { + if (CUR == '>') + NEXT; + if (oldname != NULL) + xmlFree(oldname); + return; + } + if (oldname != NULL) + xmlFree(oldname); + + /* + * Lookup the info for that element. + */ + info = docbTagLookup(name); + if (info == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Tag %s unknown\n", + name); + ctxt->wellFormed = 0; + } else if (info->depr) { +/*************************** + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n", + name); + ***************************/ + } + + /* + * Check for an Empty Element labeled the XML/SGML way + */ + if ((CUR == '/') && (NXT(1) == '>')) { + SKIP(2); + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = docbnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + return; + } + + if (CUR == '>') { + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Couldn't find end of Start Tag %s\n", + name); + ctxt->wellFormed = 0; + + /* + * end of parsing of this node. + */ + if (xmlStrEqual(name, ctxt->name)) { + nodePop(ctxt); + oldname = docbnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + } + + /* + * Capture end position and add node + */ + if ( currentNode != NULL && ctxt->record_info ) { + node_info.end_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.end_line = ctxt->input->line; + node_info.node = ctxt->node; + xmlParserAddNodeInfo(ctxt, &node_info); + } + return; + } + + /* + * Check for an Empty Element from DTD definition + */ + if ((info != NULL) && (info->empty)) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = docbnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + return; + } + + /* + * Parse the content of the element: + */ + currentNode = xmlStrdup(ctxt->name); + depth = ctxt->nameNr; + while (IS_CHAR(CUR)) { + docbParseContent(ctxt); + if (ctxt->nameNr < depth) break; + } + + if (!IS_CHAR(CUR)) { + /************ + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Premature end of data in tag %s\n", currentNode); + ctxt->wellFormed = 0; + *************/ + + /* + * end of parsing of this node. + */ + nodePop(ctxt); + oldname = docbnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + if (currentNode != NULL) + xmlFree(currentNode); + return; + } + + /* + * Capture end position and add node + */ + if ( currentNode != NULL && ctxt->record_info ) { + node_info.end_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.end_line = ctxt->input->line; + node_info.node = ctxt->node; + xmlParserAddNodeInfo(ctxt, &node_info); + } + if (currentNode != NULL) + xmlFree(currentNode); +} + +/** + * docbParseEntityDecl: + * @ctxt: an SGML parser context + * + * parse <!ENTITY declarations + * + */ + +static void +docbParseEntityDecl(xmlParserCtxtPtr ctxt) { + xmlChar *name = NULL; + xmlChar *value = NULL; + xmlChar *URI = NULL, *literal = NULL; + xmlChar *ndata = NULL; + int isParameter = 0; + xmlChar *orig = NULL; + + GROW; + if ((RAW == '<') && (NXT(1) == '!') && + (UPP(2) == 'E') && (UPP(3) == 'N') && + (UPP(4) == 'T') && (UPP(5) == 'I') && + (UPP(6) == 'T') && (UPP(7) == 'Y')) { + xmlParserInputPtr input = ctxt->input; + ctxt->instate = XML_PARSER_ENTITY_DECL; + SHRINK; + SKIP(8); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after '<!ENTITY'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + + if (RAW == '%') { + NEXT; + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after '%'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + isParameter = 1; + } + + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the entity name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + + /* + * handle the various case of definitions... + */ + if (isParameter) { + if ((RAW == '"') || (RAW == '\'')) { + value = xmlParseEntityValue(ctxt, &orig); + if (value) { + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) + ctxt->sax->entityDecl(ctxt->userData, name, + XML_INTERNAL_PARAMETER_ENTITY, + NULL, NULL, value); + } + } else { + URI = xmlParseExternalID(ctxt, &literal, 1); + if ((URI == NULL) && (literal == NULL)) { + ctxt->errNo = XML_ERR_VALUE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity value required\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (URI) { + xmlURIPtr uri; + + uri = xmlParseURI((const char *) URI); + if (uri == NULL) { + ctxt->errNo = XML_ERR_INVALID_URI; + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Invalid URI: %s\n", URI); + ctxt->wellFormed = 0; + } else { + if (uri->fragment != NULL) { + ctxt->errNo = XML_ERR_URI_FRAGMENT; + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Fragment not allowed: %s\n", URI); + ctxt->wellFormed = 0; + } else { + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->entityDecl != NULL)) + ctxt->sax->entityDecl(ctxt->userData, name, + XML_EXTERNAL_PARAMETER_ENTITY, + literal, URI, NULL); + } + xmlFreeURI(uri); + } + } + } + } else { + if ((RAW == '"') || (RAW == '\'')) { + value = xmlParseEntityValue(ctxt, &orig); + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) + ctxt->sax->entityDecl(ctxt->userData, name, + XML_INTERNAL_GENERAL_ENTITY, + NULL, NULL, value); + } else { + URI = xmlParseExternalID(ctxt, &literal, 1); + if ((URI == NULL) && (literal == NULL)) { + ctxt->errNo = XML_ERR_VALUE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity value required\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (URI) { + xmlURIPtr uri; + + uri = xmlParseURI((const char *)URI); + if (uri == NULL) { + ctxt->errNo = XML_ERR_INVALID_URI; + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Invalid URI: %s\n", URI); + ctxt->wellFormed = 0; + } else { + if (uri->fragment != NULL) { + ctxt->errNo = XML_ERR_URI_FRAGMENT; + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Fragment not allowed: %s\n", URI); + ctxt->wellFormed = 0; + } + xmlFreeURI(uri); + } + } + if ((RAW != '>') && (!IS_BLANK(CUR))) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required before content model\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + + /* + * SGML specific: here we can get the content model + */ + if (RAW != '>') { + xmlChar *contmod; + + contmod = xmlParseName(ctxt); + + if (contmod == NULL) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Could not parse entity content model\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (xmlStrEqual(contmod, BAD_CAST"NDATA")) { + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'NDATA'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + ndata = xmlParseName(ctxt); + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->unparsedEntityDecl != NULL)) { + ctxt->sax->unparsedEntityDecl(ctxt->userData, + name, literal, URI, ndata); + } + } else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) { + if ((ctxt->sax != NULL) && + (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "SUBDOC entities are not supported\n"); + SKIP_BLANKS; + ndata = xmlParseName(ctxt); + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->unparsedEntityDecl != NULL)) { + ctxt->sax->unparsedEntityDecl(ctxt->userData, + name, literal, URI, ndata); + } + } else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) { + if ((ctxt->sax != NULL) && + (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "CDATA entities are not supported\n"); + SKIP_BLANKS; + ndata = xmlParseName(ctxt); + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->unparsedEntityDecl != NULL)) { + ctxt->sax->unparsedEntityDecl(ctxt->userData, + name, literal, URI, ndata); + } + } + xmlFree(contmod); + } + } else { + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) + ctxt->sax->entityDecl(ctxt->userData, name, + XML_EXTERNAL_GENERAL_PARSED_ENTITY, + literal, URI, NULL); + } + } + } + SKIP_BLANKS; + if (RAW != '>') { + ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseEntityDecl: entity %s not terminated\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, +"Entity declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + NEXT; + } + if (orig != NULL) { + /* + * Ugly mechanism to save the raw entity value. + */ + xmlEntityPtr cur = NULL; + + if (isParameter) { + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + cur = ctxt->sax->getParameterEntity(ctxt->userData, name); + } else { + if ((ctxt->sax != NULL) && + (ctxt->sax->getEntity != NULL)) + cur = ctxt->sax->getEntity(ctxt->userData, name); + } + if (cur != NULL) { + if (cur->orig != NULL) + xmlFree(orig); + else + cur->orig = orig; + } else + xmlFree(orig); + } + if (name != NULL) xmlFree(name); + if (value != NULL) xmlFree(value); + if (URI != NULL) xmlFree(URI); + if (literal != NULL) xmlFree(literal); + if (ndata != NULL) xmlFree(ndata); + } +} + +/** + * docbParseMarkupDecl: + * @ctxt: an SGML parser context + * + * parse Markup declarations + * + * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | + * NotationDecl | PI | Comment + */ +static void +docbParseMarkupDecl(xmlParserCtxtPtr ctxt) { + GROW; + xmlParseElementDecl(ctxt); + xmlParseAttributeListDecl(ctxt); + docbParseEntityDecl(ctxt); + xmlParseNotationDecl(ctxt); + docbParsePI(ctxt); + xmlParseComment(ctxt); + /* + * This is only for internal subset. On external entities, + * the replacement is done before parsing stage + */ + if ((ctxt->external == 0) && (ctxt->inputNr == 1)) + xmlParsePEReference(ctxt); + ctxt->instate = XML_PARSER_DTD; +} + +/** + * docbParseInternalSubset: + * @ctxt: an SGML parser context + * + * parse the internal subset declaration + * + * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' + */ + +static void +docbParseInternalSubset(xmlParserCtxtPtr ctxt) { + /* + * Is there any DTD definition ? + */ + if (RAW == '[') { + ctxt->instate = XML_PARSER_DTD; + NEXT; + /* + * Parse the succession of Markup declarations and + * PEReferences. + * Subsequence (markupdecl | PEReference | S)* + */ + while (RAW != ']') { + const xmlChar *check = CUR_PTR; + int cons = ctxt->input->consumed; + + SKIP_BLANKS; + docbParseMarkupDecl(ctxt); + xmlParsePEReference(ctxt); + + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + + if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "docbParseInternalSubset: error detected in Markup declaration\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + } + } + if (RAW == ']') { + NEXT; + SKIP_BLANKS; + } + } + + /* + * We should be at the end of the DOCTYPE declaration. + */ + if (RAW != '>') { + ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + NEXT; +} + +/** + * docbParseMisc: + * @ctxt: an XML parser context + * + * parse an XML Misc* optional field. + * + * [27] Misc ::= Comment | PI | S + */ + +static void +docbParseMisc(xmlParserCtxtPtr ctxt) { + while (((RAW == '<') && (NXT(1) == '?')) || + ((RAW == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) || + IS_BLANK(CUR)) { + if ((RAW == '<') && (NXT(1) == '?')) { + docbParsePI(ctxt); + } else if (IS_BLANK(CUR)) { + NEXT; + } else + xmlParseComment(ctxt); + } +} + +/** + * docbParseDocument: + * @ctxt: an SGML parser context + * + * parse an SGML document (and build a tree if using the standard SAX + * interface). + * + * Returns 0, -1 in case of error. the parser context is augmented + * as a result of the parsing. + */ + +int +docbParseDocument(docbParserCtxtPtr ctxt) { + xmlChar start[4]; + xmlCharEncoding enc; + xmlDtdPtr dtd; + + docbDefaultSAXHandlerInit(); + ctxt->html = 2; + + GROW; + /* + * SAX: beginning of the document processing. + */ + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); + + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + + /* + * Wipe out everything which is before the first '<' + */ + SKIP_BLANKS; + if (CUR == 0) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Document is empty\n"); + ctxt->wellFormed = 0; + } + + if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + + + /* + * The Misc part of the Prolog + */ + GROW; + docbParseMisc(ctxt); + + /* + * Then possibly doc type declaration(s) and more Misc + * (doctypedecl Misc*)? + */ + GROW; + if ((RAW == '<') && (NXT(1) == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + + ctxt->inSubset = 1; + docbParseDocTypeDecl(ctxt); + if (RAW == '[') { + ctxt->instate = XML_PARSER_DTD; + docbParseInternalSubset(ctxt); + } + + /* + * Create and update the external subset. + */ + ctxt->inSubset = 2; + if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->internalSubset(ctxt->userData, ctxt->intSubName, + ctxt->extSubSystem, ctxt->extSubURI); + ctxt->inSubset = 0; + + + ctxt->instate = XML_PARSER_PROLOG; + docbParseMisc(ctxt); + } + + /* + * Time to start parsing the tree itself + */ + docbParseContent(ctxt); + + /* + * autoclose + */ + if (CUR == 0) + docbAutoClose(ctxt, NULL); + + + /* + * SAX: end of the document processing. + */ + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + + if (ctxt->myDoc != NULL) { + dtd = ctxt->myDoc->intSubset; + ctxt->myDoc->standalone = -1; + if (dtd == NULL) + ctxt->myDoc->intSubset = + xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", + BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN", + BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"); + } + if (! ctxt->wellFormed) return(-1); + return(0); +} + + +/************************************************************************ + * * + * Parser contexts handling * + * * + ************************************************************************/ + +/** + * docbInitParserCtxt: + * @ctxt: an SGML parser context + * + * Initialize a parser context + */ + +static void +docbInitParserCtxt(docbParserCtxtPtr ctxt) +{ + docbSAXHandler *sax; + + if (ctxt == NULL) return; + memset(ctxt, 0, sizeof(docbParserCtxt)); + + sax = (docbSAXHandler *) xmlMalloc(sizeof(docbSAXHandler)); + if (sax == NULL) { + xmlGenericError(xmlGenericErrorContext, + "docbInitParserCtxt: out of memory\n"); + } + memset(sax, 0, sizeof(docbSAXHandler)); + + /* Allocate the Input stack */ + ctxt->inputTab = (docbParserInputPtr *) + xmlMalloc(5 * sizeof(docbParserInputPtr)); + if (ctxt->inputTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "docbInitParserCtxt: out of memory\n"); + } + ctxt->inputNr = 0; + ctxt->inputMax = 5; + ctxt->input = NULL; + ctxt->version = NULL; + ctxt->encoding = NULL; + ctxt->standalone = -1; + ctxt->instate = XML_PARSER_START; + + /* Allocate the Node stack */ + ctxt->nodeTab = (docbNodePtr *) xmlMalloc(10 * sizeof(docbNodePtr)); + ctxt->nodeNr = 0; + ctxt->nodeMax = 10; + ctxt->node = NULL; + + /* Allocate the Name stack */ + ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); + ctxt->nameNr = 0; + ctxt->nameMax = 10; + ctxt->name = NULL; + + if (sax == NULL) ctxt->sax = &docbDefaultSAXHandler; + else { + ctxt->sax = sax; + memcpy(sax, &docbDefaultSAXHandler, sizeof(docbSAXHandler)); + } + ctxt->userData = ctxt; + ctxt->myDoc = NULL; + ctxt->wellFormed = 1; + ctxt->linenumbers = xmlLineNumbersDefaultValue; + ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; + ctxt->html = 2; + ctxt->record_info = 0; + ctxt->validate = 0; + ctxt->nbChars = 0; + ctxt->checkIndex = 0; + xmlInitNodeInfoSeq(&ctxt->node_seq); +} + +/** + * docbFreeParserCtxt: + * @ctxt: an SGML parser context + * + * Free all the memory used by a parser context. However the parsed + * document in ctxt->myDoc is not freed. + */ + +void +docbFreeParserCtxt(docbParserCtxtPtr ctxt) +{ + xmlFreeParserCtxt(ctxt); +} + +/** + * docbCreateDocParserCtxt: + * @cur: a pointer to an array of xmlChar + * @encoding: the SGML document encoding, or NULL + * + * Create a parser context for an SGML document. + * + * Returns the new parser context or NULL + */ +static docbParserCtxtPtr +docbCreateDocParserCtxt(xmlChar *cur, const char *encoding ATTRIBUTE_UNUSED) { + docbParserCtxtPtr ctxt; + docbParserInputPtr input; + /* sgmlCharEncoding enc; */ + + ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt)); + if (ctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed"); + return(NULL); + } + docbInitParserCtxt(ctxt); + input = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput)); + if (input == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed"); + xmlFree(ctxt); + return(NULL); + } + memset(input, 0, sizeof(docbParserInput)); + + input->line = 1; + input->col = 1; + input->base = cur; + input->cur = cur; + + inputPush(ctxt, input); + return(ctxt); +} + +/************************************************************************ + * * + * Progressive parsing interfaces * + * * + ************************************************************************/ + +/** + * docbParseLookupSequence: + * @ctxt: an SGML parser context + * @first: the first char to lookup + * @next: the next char to lookup or zero + * @third: the next char to lookup or zero + * + * Try to find if a sequence (first, next, third) or just (first next) or + * (first) is available in the input stream. + * This function has a side effect of (possibly) incrementing ctxt->checkIndex + * to avoid rescanning sequences of bytes, it DOES change the state of the + * parser, do not use liberally. + * This is basically similar to xmlParseLookupSequence() + * + * Returns the index to the current parsing point if the full sequence + * is available, -1 otherwise. + */ +static int +docbParseLookupSequence(docbParserCtxtPtr ctxt, xmlChar first, + xmlChar next, xmlChar third) { + int base, len; + docbParserInputPtr in; + const xmlChar *buf; + + in = ctxt->input; + if (in == NULL) return(-1); + base = in->cur - in->base; + if (base < 0) return(-1); + if (ctxt->checkIndex > base) + base = ctxt->checkIndex; + if (in->buf == NULL) { + buf = in->base; + len = in->length; + } else { + buf = in->buf->buffer->content; + len = in->buf->buffer->use; + } + /* take into account the sequence length */ + if (third) len -= 2; + else if (next) len --; + for (;base < len;base++) { + if (buf[base] == first) { + if (third != 0) { + if ((buf[base + 1] != next) || + (buf[base + 2] != third)) continue; + } else if (next != 0) { + if (buf[base + 1] != next) continue; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + if (next == 0) + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c' found at %d\n", + first, base); + else if (third == 0) + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c%c' found at %d\n", + first, next, base); + else + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c%c%c' found at %d\n", + first, next, third, base); +#endif + return(base - (in->cur - in->base)); + } + } + ctxt->checkIndex = base; +#ifdef DEBUG_PUSH + if (next == 0) + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c' failed\n", first); + else if (third == 0) + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c%c' failed\n", first, next); + else + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c%c%c' failed\n", first, next, third); +#endif + return(-1); +} + +/** + * docbParseTryOrFinish: + * @ctxt: an SGML parser context + * @terminate: last chunk indicator + * + * Try to progress on parsing + * + * Returns zero if no parsing was possible + */ +static int +docbParseTryOrFinish(docbParserCtxtPtr ctxt, int terminate) { + int ret = 0; + docbParserInputPtr in; + int avail = 0; + xmlChar cur, next; + +#ifdef DEBUG_PUSH + switch (ctxt->instate) { + case XML_PARSER_EOF: + xmlGenericError(xmlGenericErrorContext, + "HPP: try EOF\n"); break; + case XML_PARSER_START: + xmlGenericError(xmlGenericErrorContext, + "HPP: try START\n"); break; + case XML_PARSER_MISC: + xmlGenericError(xmlGenericErrorContext, + "HPP: try MISC\n");break; + case XML_PARSER_COMMENT: + xmlGenericError(xmlGenericErrorContext, + "HPP: try COMMENT\n");break; + case XML_PARSER_PROLOG: + xmlGenericError(xmlGenericErrorContext, + "HPP: try PROLOG\n");break; + case XML_PARSER_START_TAG: + xmlGenericError(xmlGenericErrorContext, + "HPP: try START_TAG\n");break; + case XML_PARSER_CONTENT: + xmlGenericError(xmlGenericErrorContext, + "HPP: try CONTENT\n");break; + case XML_PARSER_CDATA_SECTION: + xmlGenericError(xmlGenericErrorContext, + "HPP: try CDATA_SECTION\n");break; + case XML_PARSER_END_TAG: + xmlGenericError(xmlGenericErrorContext, + "HPP: try END_TAG\n");break; + case XML_PARSER_ENTITY_DECL: + xmlGenericError(xmlGenericErrorContext, + "HPP: try ENTITY_DECL\n");break; + case XML_PARSER_ENTITY_VALUE: + xmlGenericError(xmlGenericErrorContext, + "HPP: try ENTITY_VALUE\n");break; + case XML_PARSER_ATTRIBUTE_VALUE: + xmlGenericError(xmlGenericErrorContext, + "HPP: try ATTRIBUTE_VALUE\n");break; + case XML_PARSER_DTD: + xmlGenericError(xmlGenericErrorContext, + "HPP: try DTD\n");break; + case XML_PARSER_EPILOG: + xmlGenericError(xmlGenericErrorContext, + "HPP: try EPILOG\n");break; + case XML_PARSER_PI: + xmlGenericError(xmlGenericErrorContext, + "HPP: try PI\n");break; + } +#endif + + while (1) { + + in = ctxt->input; + if (in == NULL) break; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if ((avail == 0) && (terminate)) { + docbAutoClose(ctxt, NULL); + if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { + /* + * SAX: end of the document processing. + */ + ctxt->instate = XML_PARSER_EOF; + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + } + } + if (avail < 1) + goto done; + switch (ctxt->instate) { + case XML_PARSER_EOF: + /* + * Document parsing is done ! + */ + goto done; + case XML_PARSER_START: + /* + * Very first chars read from the document flow. + */ + cur = in->cur[0]; + if (IS_BLANK(cur)) { + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + } + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + if ((ctxt->sax) && (ctxt->sax->startDocument) && + (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + if ((!terminate) && + (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing internal subset\n"); +#endif + docbParseDocTypeDecl(ctxt); + ctxt->instate = XML_PARSER_PROLOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering PROLOG\n"); +#endif + } else { + ctxt->instate = XML_PARSER_MISC; + } +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering MISC\n"); +#endif + break; + case XML_PARSER_MISC: + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if ((!terminate) && + (docbParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Comment\n"); +#endif + docbParseComment(ctxt); + ctxt->instate = XML_PARSER_MISC; + } else if ((cur == '<') && (next == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + if ((!terminate) && + (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing internal subset\n"); +#endif + docbParseDocTypeDecl(ctxt); + ctxt->instate = XML_PARSER_PROLOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering PROLOG\n"); +#endif + } else if ((cur == '<') && (next == '!') && + (avail < 9)) { + goto done; + } else { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); +#endif + } + break; + case XML_PARSER_PROLOG: + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if ((!terminate) && + (docbParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Comment\n"); +#endif + docbParseComment(ctxt); + ctxt->instate = XML_PARSER_PROLOG; + } else if ((cur == '<') && (next == '!') && + (avail < 4)) { + goto done; + } else { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); +#endif + } + break; + case XML_PARSER_EPILOG: + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 1) + goto done; + cur = in->cur[0]; + if (IS_BLANK(cur)) { + docbParseCharData(ctxt); + goto done; + } + if (avail < 2) + goto done; + next = in->cur[1]; + if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if ((!terminate) && + (docbParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Comment\n"); +#endif + docbParseComment(ctxt); + ctxt->instate = XML_PARSER_EPILOG; + } else if ((cur == '<') && (next == '!') && + (avail < 4)) { + goto done; + } else { + ctxt->errNo = XML_ERR_DOCUMENT_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + break; + case XML_PARSER_START_TAG: { + xmlChar *name, *oldname; + int depth = ctxt->nameNr; + docbElemDescPtr info; + + if (avail < 2) + goto done; + cur = in->cur[0]; + if (cur != '<') { + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + if ((!terminate) && + (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; + + oldname = xmlStrdup(ctxt->name); + docbParseStartTag(ctxt); + name = ctxt->name; +#ifdef DEBUG + if (oldname == NULL) + xmlGenericError(xmlGenericErrorContext, + "Start of element %s\n", name); + else if (name == NULL) + xmlGenericError(xmlGenericErrorContext, + "Start of element failed, was %s\n", + oldname); + else + xmlGenericError(xmlGenericErrorContext, + "Start of element %s, was %s\n", + name, oldname); +#endif + if (((depth == ctxt->nameNr) && + (xmlStrEqual(oldname, ctxt->name))) || + (name == NULL)) { + if (CUR == '>') + NEXT; + if (oldname != NULL) + xmlFree(oldname); + break; + } + if (oldname != NULL) + xmlFree(oldname); + + /* + * Lookup the info for that element. + */ + info = docbTagLookup(name); + if (info == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Tag %s unknown\n", + name); + ctxt->wellFormed = 0; + } else if (info->depr) { + /*************************** + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Tag %s is deprecated\n", + name); + ***************************/ + } + + /* + * Check for an Empty Element labeled the XML/SGML way + */ + if ((CUR == '/') && (NXT(1) == '>')) { + SKIP(2); + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = docbnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", + oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + + if (CUR == '>') { + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Couldn't find end of Start Tag %s\n", + name); + ctxt->wellFormed = 0; + + /* + * end of parsing of this node. + */ + if (xmlStrEqual(name, ctxt->name)) { + nodePop(ctxt); + oldname = docbnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext, + "End of start tag problem: popping out %s\n", oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + } + + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + + /* + * Check for an Empty Element from DTD definition + */ + if ((info != NULL) && (info->empty)) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = docbnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + } + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + case XML_PARSER_CONTENT: { + long cons; + /* + * Handle preparsed entities and charRef + */ + if (ctxt->token != 0) { + xmlChar chr[2] = { 0 , 0 } ; + + chr[0] = (xmlChar) ctxt->token; + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, chr, 1); + ctxt->token = 0; + ctxt->checkIndex = 0; + } + if ((avail == 1) && (terminate)) { + cur = in->cur[0]; + if ((cur != '<') && (cur != '&')) { + if (ctxt->sax != NULL) { + if (IS_BLANK(cur)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace( + ctxt->userData, &cur, 1); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters( + ctxt->userData, &cur, 1); + } + } + ctxt->token = 0; + ctxt->checkIndex = 0; + NEXT; + } + break; + } + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + cons = ctxt->nbChars; + /* + * Sometimes DOCTYPE arrives in the middle of the document + */ + if ((cur == '<') && (next == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + if ((!terminate) && + (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Misplaced DOCTYPE declaration\n"); + ctxt->wellFormed = 0; + docbParseDocTypeDecl(ctxt); + } else if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if ((!terminate) && + (docbParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Comment\n"); +#endif + docbParseComment(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + } else if ((cur == '<') && (next == '!') && (avail < 4)) { + goto done; + } else if ((cur == '<') && (next == '/')) { + ctxt->instate = XML_PARSER_END_TAG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering END_TAG\n"); +#endif + break; + } else if (cur == '<') { + ctxt->instate = XML_PARSER_START_TAG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); +#endif + break; + } else if (cur == '&') { + if ((!terminate) && + (docbParseLookupSequence(ctxt, ';', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Reference\n"); +#endif + /* TODO: check generation of subtrees if noent !!! */ + docbParseReference(ctxt); + } else { + /* TODO Avoid the extra copy, handle directly !!!!!! */ + /* + * Goal of the following test is: + * - minimize calls to the SAX 'character' callback + * when they are mergeable + */ + if ((ctxt->inputNr == 1) && + (avail < DOCB_PARSER_BIG_BUFFER_SIZE)) { + if ((!terminate) && + (docbParseLookupSequence(ctxt, '<', 0, 0) < 0)) + goto done; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing char data\n"); +#endif + docbParseCharData(ctxt); + } + if (cons == ctxt->nbChars) { + if (ctxt->node != NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "detected an error in element content\n"); + ctxt->wellFormed = 0; + NEXT; + } + break; + } + + break; + } + case XML_PARSER_END_TAG: + if (avail < 2) + goto done; + if ((!terminate) && + (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; + docbParseEndTag(ctxt); + if (ctxt->nameNr == 0) { + ctxt->instate = XML_PARSER_EPILOG; + } else { + ctxt->instate = XML_PARSER_CONTENT; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_CDATA_SECTION: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == CDATA\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_DTD: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == DTD\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_COMMENT: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == COMMENT\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_PI: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == PI\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_ENTITY_DECL: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == ENTITY_DECL\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_ENTITY_VALUE: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == ENTITY_VALUE\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering DTD\n"); +#endif + break; + case XML_PARSER_ATTRIBUTE_VALUE: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == ATTRIBUTE_VALUE\n"); + ctxt->instate = XML_PARSER_START_TAG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); +#endif + break; + case XML_PARSER_SYSTEM_LITERAL: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + + case XML_PARSER_IGNORE: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == XML_PARSER_IGNORE\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_PUBLIC_LITERAL: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == XML_PARSER_LITERAL\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + } +done: + if ((avail == 0) && (terminate)) { + docbAutoClose(ctxt, NULL); + if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { + /* + * SAX: end of the document processing. + */ + ctxt->instate = XML_PARSER_EOF; + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + } + } + if ((ctxt->myDoc != NULL) && + ((terminate) || (ctxt->instate == XML_PARSER_EOF) || + (ctxt->instate == XML_PARSER_EPILOG))) { + xmlDtdPtr dtd; + dtd = ctxt->myDoc->intSubset; + if (dtd == NULL) + ctxt->myDoc->intSubset = + xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", + BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN", + BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"); + } +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); +#endif + return(ret); +} + +/** + * docbParseChunk: + * @ctxt: an XML parser context + * @chunk: an char array + * @size: the size in byte of the chunk + * @terminate: last chunk indicator + * + * Parse a Chunk of memory + * + * Returns zero if no error, the xmlParserErrors otherwise. + */ +int +docbParseChunk(docbParserCtxtPtr ctxt, const char *chunk, int size, + int terminate) { + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { + int base = ctxt->input->base - ctxt->input->buf->buffer->content; + int cur = ctxt->input->cur - ctxt->input->base; + + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + ctxt->input->base = ctxt->input->buf->buffer->content + base; + ctxt->input->cur = ctxt->input->base + cur; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); +#endif + + if ((terminate) || (ctxt->input->buf->buffer->use > 80)) + docbParseTryOrFinish(ctxt, terminate); + } else if (ctxt->instate != XML_PARSER_EOF) { + xmlParserInputBufferPush(ctxt->input->buf, 0, ""); + docbParseTryOrFinish(ctxt, terminate); + } + if (terminate) { + if ((ctxt->instate != XML_PARSER_EOF) && + (ctxt->instate != XML_PARSER_EPILOG) && + (ctxt->instate != XML_PARSER_MISC)) { + ctxt->errNo = XML_ERR_DOCUMENT_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + } + if (ctxt->instate != XML_PARSER_EOF) { + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + } + ctxt->instate = XML_PARSER_EOF; + } + return((xmlParserErrors) ctxt->errNo); +} + +/************************************************************************ + * * + * User entry points * + * * + ************************************************************************/ + +/** + * docbCreatePushParserCtxt: + * @sax: a SAX handler + * @user_data: The user data returned on SAX callbacks + * @chunk: a pointer to an array of chars + * @size: number of chars in the array + * @filename: an optional file name or URI + * @enc: an optional encoding + * + * Create a parser context for using the DocBook SGML parser in push mode + * To allow content encoding detection, @size should be >= 4 + * The value of @filename is used for fetching external entities + * and error/warning reports. + * + * Returns the new parser context or NULL + */ +docbParserCtxtPtr +docbCreatePushParserCtxt(docbSAXHandlerPtr sax, void *user_data, + const char *chunk, int size, const char *filename, + xmlCharEncoding enc) { + docbParserCtxtPtr ctxt; + docbParserInputPtr inputStream; + xmlParserInputBufferPtr buf; + + buf = xmlAllocParserInputBuffer(enc); + if (buf == NULL) return(NULL); + + ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt)); + if (ctxt == NULL) { + xmlFree(buf); + return(NULL); + } + memset(ctxt, 0, sizeof(docbParserCtxt)); + docbInitParserCtxt(ctxt); + if (sax != NULL) { + if (ctxt->sax != &docbDefaultSAXHandler) + xmlFree(ctxt->sax); + ctxt->sax = (docbSAXHandlerPtr) xmlMalloc(sizeof(docbSAXHandler)); + if (ctxt->sax == NULL) { + xmlFree(buf); + xmlFree(ctxt); + return(NULL); + } + memcpy(ctxt->sax, sax, sizeof(docbSAXHandler)); + if (user_data != NULL) + ctxt->userData = user_data; + } + if (filename == NULL) { + ctxt->directory = NULL; + } else { + ctxt->directory = xmlParserGetDirectory(filename); + } + + inputStream = docbNewInputStream(ctxt); + if (inputStream == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + if (filename == NULL) + inputStream->filename = NULL; + else + inputStream->filename = xmlMemStrdup(filename); + inputStream->buf = buf; + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + + inputPush(ctxt, inputStream); + + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL)) { + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); +#endif + } + + return(ctxt); +} + +/** + * docbSAXParseDoc: + * @cur: a pointer to an array of xmlChar + * @encoding: a free form C string describing the SGML document encoding, or NULL + * @sax: the SAX handler block + * @userData: if using SAX, this pointer will be provided on callbacks. + * + * parse an SGML in-memory document and build a tree. + * It use the given SAX function block to handle the parsing callback. + * If sax is NULL, fallback to the default DOM tree building routines. + * + * Returns the resulting document tree + */ + +docbDocPtr +docbSAXParseDoc(xmlChar *cur, const char *encoding, docbSAXHandlerPtr sax, void *userData) { + docbDocPtr ret; + docbParserCtxtPtr ctxt; + + if (cur == NULL) return(NULL); + + + ctxt = docbCreateDocParserCtxt(cur, encoding); + if (ctxt == NULL) return(NULL); + if (sax != NULL) { + ctxt->sax = sax; + ctxt->userData = userData; + } + + docbParseDocument(ctxt); + ret = ctxt->myDoc; + if (sax != NULL) { + ctxt->sax = NULL; + ctxt->userData = NULL; + } + docbFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * docbParseDoc: + * @cur: a pointer to an array of xmlChar + * @encoding: a free form C string describing the SGML document encoding, or NULL + * + * parse an SGML in-memory document and build a tree. + * + * Returns the resulting document tree + */ + +docbDocPtr +docbParseDoc(xmlChar *cur, const char *encoding) { + return(docbSAXParseDoc(cur, encoding, NULL, NULL)); +} + + +/** + * docbCreateFileParserCtxt: + * @filename: the filename + * @encoding: the SGML document encoding, or NULL + * + * Create a parser context for a file content. + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * + * Returns the new parser context or NULL + */ +docbParserCtxtPtr +docbCreateFileParserCtxt(const char *filename, + const char *encoding ATTRIBUTE_UNUSED) +{ + docbParserCtxtPtr ctxt; + docbParserInputPtr inputStream; + xmlParserInputBufferPtr buf; + /* sgmlCharEncoding enc; */ + + buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); + if (buf == NULL) return(NULL); + + ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt)); + if (ctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed"); + return(NULL); + } + memset(ctxt, 0, sizeof(docbParserCtxt)); + docbInitParserCtxt(ctxt); + inputStream = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput)); + if (inputStream == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed"); + xmlFree(ctxt); + return(NULL); + } + memset(inputStream, 0, sizeof(docbParserInput)); + + inputStream->filename = (char *) + xmlNormalizeWindowsPath((const xmlChar *)filename); + inputStream->line = 1; + inputStream->col = 1; + inputStream->buf = buf; + inputStream->directory = NULL; + + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->free = NULL; + + inputPush(ctxt, inputStream); + return(ctxt); +} + +/** + * docbSAXParseFile: + * @filename: the filename + * @encoding: a free form C string describing the SGML document encoding, or NULL + * @sax: the SAX handler block + * @userData: if using SAX, this pointer will be provided on callbacks. + * + * parse an SGML file and build a tree. Automatic support for ZLIB/Compress + * compressed document is provided by default if found at compile-time. + * It use the given SAX function block to handle the parsing callback. + * If sax is NULL, fallback to the default DOM tree building routines. + * + * Returns the resulting document tree + */ + +docbDocPtr +docbSAXParseFile(const char *filename, const char *encoding, docbSAXHandlerPtr sax, + void *userData) { + docbDocPtr ret; + docbParserCtxtPtr ctxt; + docbSAXHandlerPtr oldsax = NULL; + + ctxt = docbCreateFileParserCtxt(filename, encoding); + if (ctxt == NULL) return(NULL); + if (sax != NULL) { + oldsax = ctxt->sax; + ctxt->sax = sax; + ctxt->userData = userData; + } + + docbParseDocument(ctxt); + + ret = ctxt->myDoc; + if (sax != NULL) { + ctxt->sax = oldsax; + ctxt->userData = NULL; + } + docbFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * docbParseFile: + * @filename: the filename + * @encoding: a free form C string describing document encoding, or NULL + * + * parse a Docbook SGML file and build a tree. Automatic support for + * ZLIB/Compress compressed document is provided by default if found + * at compile-time. + * + * Returns the resulting document tree + */ + +docbDocPtr +docbParseFile(const char *filename, const char *encoding) { + return(docbSAXParseFile(filename, encoding, NULL, NULL)); +} + +#endif /* LIBXML_DOCB_ENABLED */ diff --git a/bundle/libxml/HTMLparser.c b/bundle/libxml/HTMLparser.c new file mode 100644 index 0000000000..7db3e9e3a8 --- /dev/null +++ b/bundle/libxml/HTMLparser.c @@ -0,0 +1,5013 @@ +/* + * HTMLparser.c : an HTML 4.0 non-verifying parser + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" +#ifdef LIBXML_HTML_ENABLED + +#include <string.h> +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/xmlerror.h> +#include <libxml/HTMLparser.h> +#include <libxml/HTMLtree.h> +#include <libxml/entities.h> +#include <libxml/encoding.h> +#include <libxml/valid.h> +#include <libxml/xmlIO.h> +#include <libxml/globals.h> + +#define HTML_MAX_NAMELEN 1000 +#define HTML_PARSER_BIG_BUFFER_SIZE 1000 +#define HTML_PARSER_BUFFER_SIZE 100 + +/* #define DEBUG */ +/* #define DEBUG_PUSH */ + +static int htmlOmittedDefaultValue = 1; + +xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, + xmlChar end, xmlChar end2, xmlChar end3); +static void htmlParseComment(htmlParserCtxtPtr ctxt); + +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ + +/** + * htmlnamePush: + * @ctxt: an HTML parser context + * @value: the element name + * + * Pushes a new element name on top of the name stack + * + * Returns 0 in case of error, the index in the stack otherwise + */ +static int +htmlnamePush(htmlParserCtxtPtr ctxt, xmlChar * value) +{ + if (ctxt->nameNr >= ctxt->nameMax) { + ctxt->nameMax *= 2; + ctxt->nameTab = + (xmlChar * *)xmlRealloc(ctxt->nameTab, + ctxt->nameMax * + sizeof(ctxt->nameTab[0])); + if (ctxt->nameTab == NULL) { + xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); + return (0); + } + } + ctxt->nameTab[ctxt->nameNr] = value; + ctxt->name = value; + return (ctxt->nameNr++); +} +/** + * htmlnamePop: + * @ctxt: an HTML parser context + * + * Pops the top element name from the name stack + * + * Returns the name just removed + */ +static xmlChar * +htmlnamePop(htmlParserCtxtPtr ctxt) +{ + xmlChar *ret; + + if (ctxt->nameNr <= 0) + return (0); + ctxt->nameNr--; + if (ctxt->nameNr < 0) + return (0); + if (ctxt->nameNr > 0) + ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; + else + ctxt->name = NULL; + ret = ctxt->nameTab[ctxt->nameNr]; + ctxt->nameTab[ctxt->nameNr] = 0; + return (ret); +} + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one need to make assumption on the context to use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled + * in ISO-Latin or UTF-8, and the current 16 bit value if compiled + * in UNICODE mode. This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR + * it should be used only to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * + * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding + * + * CURRENT Returns the current char value, with the full decoding of + * UTF-8 if we are using this mode. It returns an int. + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly + */ + +#define UPPER (toupper(*ctxt->input->cur)) + +#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val) + +#define NXT(val) ctxt->input->cur[(val)] + +#define UPP(val) (toupper(ctxt->input->cur[(val)])) + +#define CUR_PTR ctxt->input->cur + +#define SHRINK xmlParserInputShrink(ctxt->input) + +#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK) + +#define CURRENT ((int) (*ctxt->input->cur)) + +#define SKIP_BLANKS htmlSkipBlankChars(ctxt) + +/* Inported from XML */ + +/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ +#define CUR ((int) (*ctxt->input->cur)) +#define NEXT xmlNextChar(ctxt),ctxt->nbChars++ + +#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) +#define NXT(val) ctxt->input->cur[(val)] +#define CUR_PTR ctxt->input->cur + + +#define NEXTL(l) do { \ + if (*(ctxt->input->cur) == '\n') { \ + ctxt->input->line++; ctxt->input->col = 1; \ + } else ctxt->input->col++; \ + ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ + } while (0) + +/************ + \ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ + if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); + ************/ + +#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l) +#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) + +#define COPY_BUF(l,b,i,v) \ + if (l == 1) b[i++] = (xmlChar) v; \ + else i += xmlCopyChar(l,&b[i],v) + +/** + * htmlCurrentChar: + * @ctxt: the HTML parser context + * @len: pointer to the length of the char read + * + * The current char value, if using UTF-8 this may actually span multiple + * bytes in the input buffer. Implement the end of line normalization: + * 2.11 End-of-Line Handling + * If the encoding is unspecified, in the case we find an ISO-Latin-1 + * char, then the encoding converter is plugged in automatically. + * + * Returns the current char value and its length + */ + +static int +htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { + if (ctxt->instate == XML_PARSER_EOF) + return(0); + + if (ctxt->token != 0) { + *len = 0; + return(ctxt->token); + } + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + const unsigned char *cur = ctxt->input->cur; + unsigned char c; + unsigned int val; + + c = *cur; + if (c & 0x80) { + if (cur[1] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + + if (cur[2] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (cur[3] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + if (!IS_CHAR(val)) { + ctxt->errNo = XML_ERR_INVALID_ENCODING; + if ((ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Char 0x%X out of allowed range\n", val); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + return(val); + } else { + /* 1-byte code */ + *len = 1; + return((int) *ctxt->input->cur); + } + } + /* + * Assume it's a fixed length encoding (1) with + * a compatible encoding for the ASCII set, since + * XML constructs only use < 128 chars + */ + *len = 1; + if ((int) *ctxt->input->cur < 0x80) + return((int) *ctxt->input->cur); + + /* + * Humm this is bad, do an automatic flow conversion + */ + xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(xmlCurrentChar(ctxt, len)); + +encoding_error: + /* + * If we detect an UTF8 error that probably mean that the + * input encoding didn't get properly advertized in the + * declaration header. Report the error and switch the encoding + * to ISO-Latin-1 (if you don't like this policy, just declare the + * encoding !) + */ + ctxt->errNo = XML_ERR_INVALID_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { + ctxt->sax->error(ctxt->userData, + "Input is not proper UTF-8, indicate encoding !\n"); + ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); + } + + ctxt->charset = XML_CHAR_ENCODING_8859_1; + *len = 1; + return((int) *ctxt->input->cur); +} + +/** + * htmlSkipBlankChars: + * @ctxt: the HTML parser context + * + * skip all blanks character found at that point in the input streams. + * + * Returns the number of space chars skipped + */ + +static int +htmlSkipBlankChars(xmlParserCtxtPtr ctxt) { + int res = 0; + + while (IS_BLANK(*(ctxt->input->cur))) { + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { + xmlPopInput(ctxt); + } else { + if (*(ctxt->input->cur) == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else ctxt->input->col++; + ctxt->input->cur++; + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + } + res++; + } + return(res); +} + + + +/************************************************************************ + * * + * The list of HTML elements and their properties * + * * + ************************************************************************/ + +/* + * Start Tag: 1 means the start tag can be ommited + * End Tag: 1 means the end tag can be ommited + * 2 means it's forbidden (empty elements) + * 3 means the tag is stylistic and should be closed easily + * Depr: this element is deprecated + * DTD: 1 means that this element is valid only in the Loose DTD + * 2 means that this element is valid only in the Frameset DTD + * + * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description + */ +static const htmlElemDesc +html40ElementTable[] = { +{ "a", 0, 0, 0, 0, 0, 0, 1, "anchor " }, +{ "abbr", 0, 0, 0, 0, 0, 0, 1, "abbreviated form" }, +{ "acronym", 0, 0, 0, 0, 0, 0, 1, "" }, +{ "address", 0, 0, 0, 0, 0, 0, 0, "information on author " }, +{ "applet", 0, 0, 0, 0, 1, 1, 2, "java applet " }, +{ "area", 0, 2, 2, 1, 0, 0, 0, "client-side image map area " }, +{ "b", 0, 3, 0, 0, 0, 0, 1, "bold text style" }, +{ "base", 0, 2, 2, 1, 0, 0, 0, "document base uri " }, +{ "basefont", 0, 2, 2, 1, 1, 1, 1, "base font size " }, +{ "bdo", 0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride " }, +{ "big", 0, 3, 0, 0, 0, 0, 1, "large text style" }, +{ "blockquote", 0, 0, 0, 0, 0, 0, 0, "long quotation " }, +{ "body", 1, 1, 0, 0, 0, 0, 0, "document body " }, +{ "br", 0, 2, 2, 1, 0, 0, 1, "forced line break " }, +{ "button", 0, 0, 0, 0, 0, 0, 2, "push button " }, +{ "caption", 0, 0, 0, 0, 0, 0, 0, "table caption " }, +{ "center", 0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center " }, +{ "cite", 0, 0, 0, 0, 0, 0, 1, "citation" }, +{ "code", 0, 0, 0, 0, 0, 0, 1, "computer code fragment" }, +{ "col", 0, 2, 2, 1, 0, 0, 0, "table column " }, +{ "colgroup", 0, 1, 0, 0, 0, 0, 0, "table column group " }, +{ "dd", 0, 1, 0, 0, 0, 0, 0, "definition description " }, +{ "del", 0, 0, 0, 0, 0, 0, 2, "deleted text " }, +{ "dfn", 0, 0, 0, 0, 0, 0, 1, "instance definition" }, +{ "dir", 0, 0, 0, 0, 1, 1, 0, "directory list" }, +{ "div", 0, 0, 0, 0, 0, 0, 0, "generic language/style container"}, +{ "dl", 0, 0, 0, 0, 0, 0, 0, "definition list " }, +{ "dt", 0, 1, 0, 0, 0, 0, 0, "definition term " }, +{ "em", 0, 3, 0, 0, 0, 0, 1, "emphasis" }, +{ "fieldset", 0, 0, 0, 0, 0, 0, 0, "form control group " }, +{ "font", 0, 3, 0, 0, 1, 1, 1, "local change to font " }, +{ "form", 0, 0, 0, 0, 0, 0, 0, "interactive form " }, +{ "frame", 0, 2, 2, 1, 0, 2, 0, "subwindow " }, +{ "frameset", 0, 0, 0, 0, 0, 2, 0, "window subdivision" }, +{ "h1", 0, 0, 0, 0, 0, 0, 0, "heading " }, +{ "h2", 0, 0, 0, 0, 0, 0, 0, "heading " }, +{ "h3", 0, 0, 0, 0, 0, 0, 0, "heading " }, +{ "h4", 0, 0, 0, 0, 0, 0, 0, "heading " }, +{ "h5", 0, 0, 0, 0, 0, 0, 0, "heading " }, +{ "h6", 0, 0, 0, 0, 0, 0, 0, "heading " }, +{ "head", 1, 1, 0, 0, 0, 0, 0, "document head " }, +{ "hr", 0, 2, 2, 1, 0, 0, 0, "horizontal rule " }, +{ "html", 1, 1, 0, 0, 0, 0, 0, "document root element " }, +{ "i", 0, 3, 0, 0, 0, 0, 1, "italic text style" }, +{ "iframe", 0, 0, 0, 0, 0, 1, 2, "inline subwindow " }, +{ "img", 0, 2, 2, 1, 0, 0, 1, "embedded image " }, +{ "input", 0, 2, 2, 1, 0, 0, 1, "form control " }, +{ "ins", 0, 0, 0, 0, 0, 0, 2, "inserted text" }, +{ "isindex", 0, 2, 2, 1, 1, 1, 0, "single line prompt " }, +{ "kbd", 0, 0, 0, 0, 0, 0, 1, "text to be entered by the user" }, +{ "label", 0, 0, 0, 0, 0, 0, 1, "form field label text " }, +{ "legend", 0, 0, 0, 0, 0, 0, 0, "fieldset legend " }, +{ "li", 0, 1, 1, 0, 0, 0, 0, "list item " }, +{ "link", 0, 2, 2, 1, 0, 0, 0, "a media-independent link " }, +{ "map", 0, 0, 0, 0, 0, 0, 2, "client-side image map " }, +{ "menu", 0, 0, 0, 0, 1, 1, 0, "menu list " }, +{ "meta", 0, 2, 2, 1, 0, 0, 0, "generic metainformation " }, +{ "noframes", 0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering " }, +{ "noscript", 0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering " }, +{ "object", 0, 0, 0, 0, 0, 0, 2, "generic embedded object " }, +{ "ol", 0, 0, 0, 0, 0, 0, 0, "ordered list " }, +{ "optgroup", 0, 0, 0, 0, 0, 0, 0, "option group " }, +{ "option", 0, 1, 0, 0, 0, 0, 0, "selectable choice " }, +{ "p", 0, 1, 0, 0, 0, 0, 0, "paragraph " }, +{ "param", 0, 2, 2, 1, 0, 0, 0, "named property value " }, +{ "pre", 0, 0, 0, 0, 0, 0, 0, "preformatted text " }, +{ "q", 0, 0, 0, 0, 0, 0, 1, "short inline quotation " }, +{ "s", 0, 3, 0, 0, 1, 1, 1, "strike-through text style" }, +{ "samp", 0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc." }, +{ "script", 0, 0, 0, 0, 0, 0, 2, "script statements " }, +{ "select", 0, 0, 0, 0, 0, 0, 1, "option selector " }, +{ "small", 0, 3, 0, 0, 0, 0, 1, "small text style" }, +{ "span", 0, 0, 0, 0, 0, 0, 1, "generic language/style container " }, +{ "strike", 0, 3, 0, 0, 1, 1, 1, "strike-through text" }, +{ "strong", 0, 3, 0, 0, 0, 0, 1, "strong emphasis" }, +{ "style", 0, 0, 0, 0, 0, 0, 0, "style info " }, +{ "sub", 0, 3, 0, 0, 0, 0, 1, "subscript" }, +{ "sup", 0, 3, 0, 0, 0, 0, 1, "superscript " }, +{ "table", 0, 0, 0, 0, 0, 0, 0, " " }, +{ "tbody", 1, 0, 0, 0, 0, 0, 0, "table body " }, +{ "td", 0, 0, 0, 0, 0, 0, 0, "table data cell" }, +{ "textarea", 0, 0, 0, 0, 0, 0, 1, "multi-line text field " }, +{ "tfoot", 0, 1, 0, 0, 0, 0, 0, "table footer " }, +{ "th", 0, 1, 0, 0, 0, 0, 0, "table header cell" }, +{ "thead", 0, 1, 0, 0, 0, 0, 0, "table header " }, +{ "title", 0, 0, 0, 0, 0, 0, 0, "document title " }, +{ "tr", 0, 0, 0, 0, 0, 0, 0, "table row " }, +{ "tt", 0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style" }, +{ "u", 0, 3, 0, 0, 1, 1, 1, "underlined text style" }, +{ "ul", 0, 0, 0, 0, 0, 0, 0, "unordered list " }, +{ "var", 0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument" }, +}; + +/* + * start tags that imply the end of current element + */ +static const char *htmlStartClose[] = { +"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6", + "dl", "ul", "ol", "menu", "dir", "address", "pre", + "listing", "xmp", "head", NULL, +"head", "p", NULL, +"title", "p", NULL, +"body", "head", "style", "link", "title", "p", NULL, +"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address", + "pre", "listing", "xmp", "head", "li", NULL, +"hr", "p", "head", NULL, +"h1", "p", "head", NULL, +"h2", "p", "head", NULL, +"h3", "p", "head", NULL, +"h4", "p", "head", NULL, +"h5", "p", "head", NULL, +"h6", "p", "head", NULL, +"dir", "p", "head", NULL, +"address", "p", "head", "ul", NULL, +"pre", "p", "head", "ul", NULL, +"listing", "p", "head", NULL, +"xmp", "p", "head", NULL, +"blockquote", "p", "head", NULL, +"dl", "p", "dt", "menu", "dir", "address", "pre", "listing", + "xmp", "head", NULL, +"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", + "head", "dd", NULL, +"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", + "head", "dt", NULL, +"ul", "p", "head", "ol", "menu", "dir", "address", "pre", + "listing", "xmp", NULL, +"ol", "p", "head", "ul", NULL, +"menu", "p", "head", "ul", NULL, +"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL, +"div", "p", "head", NULL, +"noscript", "p", "head", NULL, +"center", "font", "b", "i", "p", "head", NULL, +"a", "a", NULL, +"caption", "p", NULL, +"colgroup", "caption", "colgroup", "col", "p", NULL, +"col", "caption", "col", "p", NULL, +"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", + "listing", "xmp", "a", NULL, +"th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, +"td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, +"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, +"thead", "caption", "col", "colgroup", NULL, +"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", + "tbody", "p", NULL, +"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", + "tfoot", "tbody", "p", NULL, +"optgroup", "option", NULL, +"option", "option", NULL, +"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", + "pre", "listing", "xmp", "a", NULL, +NULL +}; + +/* + * The list of HTML elements which are supposed not to have + * CDATA content and where a p element will be implied + * + * TODO: extend that list by reading the HTML SGML DTD on + * implied paragraph + */ +static const char *htmlNoContentElements[] = { + "html", + "head", + "body", + NULL +}; + +/* + * The list of HTML attributes which are of content %Script; + * NOTE: when adding ones, check htmlIsScriptAttribute() since + * it assumes the name starts with 'on' + */ +static const char *htmlScriptAttributes[] = { + "onclick", + "ondblclick", + "onmousedown", + "onmouseup", + "onmouseover", + "onmousemove", + "onmouseout", + "onkeypress", + "onkeydown", + "onkeyup", + "onload", + "onunload", + "onfocus", + "onblur", + "onsubmit", + "onrest", + "onchange", + "onselect" +}; + +/* + * This table is used by the htmlparser to know what to do with + * broken html pages. By assigning different priorities to different + * elements the parser can decide how to handle extra endtags. + * Endtags are only allowed to close elements with lower or equal + * priority. + */ + +typedef struct { + const char *name; + int priority; +} elementPriority; + +static const elementPriority htmlEndPriority[] = { + {"div", 150}, + {"td", 160}, + {"th", 160}, + {"tr", 170}, + {"thead", 180}, + {"tbody", 180}, + {"tfoot", 180}, + {"table", 190}, + {"head", 200}, + {"body", 200}, + {"html", 220}, + {NULL, 100} /* Default priority */ +}; + +static const char** htmlStartCloseIndex[100]; +static int htmlStartCloseIndexinitialized = 0; + +/************************************************************************ + * * + * functions to handle HTML specific data * + * * + ************************************************************************/ + +/** + * htmlInitAutoClose: + * + * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. + * This is not reentrant. Call xmlInitParser() once before processing in + * case of use in multithreaded programs. + */ +void +htmlInitAutoClose(void) { + int indx, i = 0; + + if (htmlStartCloseIndexinitialized) return; + + for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL; + indx = 0; + while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) { + htmlStartCloseIndex[indx++] = &htmlStartClose[i]; + while (htmlStartClose[i] != NULL) i++; + i++; + } + htmlStartCloseIndexinitialized = 1; +} + +/** + * htmlTagLookup: + * @tag: The tag name in lowercase + * + * Lookup the HTML tag in the ElementTable + * + * Returns the related htmlElemDescPtr or NULL if not found. + */ +const htmlElemDesc * +htmlTagLookup(const xmlChar *tag) { + unsigned int i; + + for (i = 0; i < (sizeof(html40ElementTable) / + sizeof(html40ElementTable[0]));i++) { + if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) + return((const htmlElemDescPtr) (const htmlElemDescPtr) (const htmlElemDescPtr) (const htmlElemDescPtr) (const htmlElemDescPtr) (const htmlElemDescPtr) (const htmlElemDescPtr) (const htmlElemDescPtr) (const htmlElemDescPtr) &html40ElementTable[i]); + } + return(NULL); +} + +/** + * htmlGetEndPriority: + * @name: The name of the element to look up the priority for. + * + * Return value: The "endtag" priority. + **/ +static int +htmlGetEndPriority (const xmlChar *name) { + int i = 0; + + while ((htmlEndPriority[i].name != NULL) && + (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name))) + i++; + + return(htmlEndPriority[i].priority); +} + +/** + * htmlCheckAutoClose: + * @newtag: The new tag name + * @oldtag: The old tag name + * + * Checks whether the new tag is one of the registered valid tags for + * closing old. + * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. + * + * Returns 0 if no, 1 if yes. + */ +static int +htmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) { + int i, indx; + const char **closed = NULL; + + if (htmlStartCloseIndexinitialized == 0) htmlInitAutoClose(); + + /* inefficient, but not a big deal */ + for (indx = 0; indx < 100;indx++) { + closed = htmlStartCloseIndex[indx]; + if (closed == NULL) return(0); + if (xmlStrEqual(BAD_CAST *closed, newtag)) break; + } + + i = closed - htmlStartClose; + i++; + while (htmlStartClose[i] != NULL) { + if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) { + return(1); + } + i++; + } + return(0); +} + +/** + * htmlAutoCloseOnClose: + * @ctxt: an HTML parser context + * @newtag: The new tag name + * @force: force the tag closure + * + * The HTML DTD allows an ending tag to implicitly close other tags. + */ +static void +htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { + const htmlElemDesc * info; + xmlChar *oldname; + int i, priority; + +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr); + for (i = 0;i < ctxt->nameNr;i++) + xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]); +#endif + + priority = htmlGetEndPriority (newtag); + + for (i = (ctxt->nameNr - 1);i >= 0;i--) { + + if (xmlStrEqual(newtag, ctxt->nameTab[i])) break; + /* + * A missplaced endtag can only close elements with lower + * or equal priority, so if we find an element with higher + * priority before we find an element with + * matching name, we just ignore this endtag + */ + if (htmlGetEndPriority (ctxt->nameTab[i]) > priority) return; + } + if (i < 0) return; + + while (!xmlStrEqual(newtag, ctxt->name)) { + info = htmlTagLookup(ctxt->name); + if ((info == NULL) || (info->endTag == 1)) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name); +#endif + } else if (info->endTag == 3) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", newtag, ctxt->name); + +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Opening and ending tag mismatch: %s and %s\n", + newtag, ctxt->name); + ctxt->wellFormed = 0; + } + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoCloseOnClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } +} + +/** + * htmlAutoCloseOnEnd: + * @ctxt: an HTML parser context + * + * Close all remaining tags at the end of the stream + */ +static void +htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt) { + xmlChar *oldname; + int i; + + if (ctxt->nameNr == 0) + return; +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Close of stack: %d elements\n", ctxt->nameNr); +#endif + + for (i = (ctxt->nameNr - 1);i >= 0;i--) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoCloseOnEnd: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } +} + +/** + * htmlAutoClose: + * @ctxt: an HTML parser context + * @newtag: The new tag name or NULL + * + * The HTML DTD allows a tag to implicitly close other tags. + * The list is kept in htmlStartClose array. This function is + * called when a new tag has been detected and generates the + * appropriates closes if possible/needed. + * If newtag is NULL this mean we are at the end of the resource + * and we should check + */ +static void +htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { + xmlChar *oldname; + while ((newtag != NULL) && (ctxt->name != NULL) && + (htmlCheckAutoClose(newtag, ctxt->name))) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoClose: %s closes %s\n", newtag, ctxt->name); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } + if (newtag == NULL) { + htmlAutoCloseOnEnd(ctxt); + return; + } + while ((newtag == NULL) && (ctxt->name != NULL) && + ((xmlStrEqual(ctxt->name, BAD_CAST"head")) || + (xmlStrEqual(ctxt->name, BAD_CAST"body")) || + (xmlStrEqual(ctxt->name, BAD_CAST"html")))) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoClose: EOF closes %s\n", ctxt->name); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } + +} + +/** + * htmlAutoCloseTag: + * @doc: the HTML document + * @name: The tag name + * @elem: the HTML element + * + * The HTML DTD allows a tag to implicitly close other tags. + * The list is kept in htmlStartClose array. This function checks + * if the element or one of it's children would autoclose the + * given tag. + * + * Returns 1 if autoclose, 0 otherwise + */ +int +htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) { + htmlNodePtr child; + + if (elem == NULL) return(1); + if (xmlStrEqual(name, elem->name)) return(0); + if (htmlCheckAutoClose(elem->name, name)) return(1); + child = elem->children; + while (child != NULL) { + if (htmlAutoCloseTag(doc, name, child)) return(1); + child = child->next; + } + return(0); +} + +/** + * htmlIsAutoClosed: + * @doc: the HTML document + * @elem: the HTML element + * + * The HTML DTD allows a tag to implicitly close other tags. + * The list is kept in htmlStartClose array. This function checks + * if a tag is autoclosed by one of it's child + * + * Returns 1 if autoclosed, 0 otherwise + */ +int +htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) { + htmlNodePtr child; + + if (elem == NULL) return(1); + child = elem->children; + while (child != NULL) { + if (htmlAutoCloseTag(doc, elem->name, child)) return(1); + child = child->next; + } + return(0); +} + +/** + * htmlCheckImplied: + * @ctxt: an HTML parser context + * @newtag: The new tag name + * + * The HTML DTD allows a tag to exists only implicitly + * called when a new tag has been detected and generates the + * appropriates implicit tags if missing + */ +static void +htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { + if (!htmlOmittedDefaultValue) + return; + if (xmlStrEqual(newtag, BAD_CAST"html")) + return; + if (ctxt->nameNr <= 0) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Implied element html: pushed html\n"); +#endif + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"html")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); + } + if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head"))) + return; + if ((ctxt->nameNr <= 1) && + ((xmlStrEqual(newtag, BAD_CAST"script")) || + (xmlStrEqual(newtag, BAD_CAST"style")) || + (xmlStrEqual(newtag, BAD_CAST"meta")) || + (xmlStrEqual(newtag, BAD_CAST"link")) || + (xmlStrEqual(newtag, BAD_CAST"title")) || + (xmlStrEqual(newtag, BAD_CAST"base")))) { + /* + * dropped OBJECT ... i you put it first BODY will be + * assumed ! + */ +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Implied element head: pushed head\n"); +#endif + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"head")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); + } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && + (!xmlStrEqual(newtag, BAD_CAST"frame")) && + (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { + int i; + for (i = 0;i < ctxt->nameNr;i++) { + if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { + return; + } + if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) { + return; + } + } + +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Implied element body: pushed body\n"); +#endif + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"body")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL); + } +} + +/** + * htmlCheckParagraph + * @ctxt: an HTML parser context + * + * Check whether a p element need to be implied before inserting + * characters in the current element. + * + * Returns 1 if a paragraph has been inserted, 0 if not and -1 + * in case of error. + */ + +static int +htmlCheckParagraph(htmlParserCtxtPtr ctxt) { + const xmlChar *tag; + int i; + + if (ctxt == NULL) + return(-1); + tag = ctxt->name; + if (tag == NULL) { + htmlAutoClose(ctxt, BAD_CAST"p"); + htmlCheckImplied(ctxt, BAD_CAST"p"); + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"p")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL); + return(1); + } + if (!htmlOmittedDefaultValue) + return(0); + for (i = 0; htmlNoContentElements[i] != NULL; i++) { + if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Implied element paragraph\n"); +#endif + htmlAutoClose(ctxt, BAD_CAST"p"); + htmlCheckImplied(ctxt, BAD_CAST"p"); + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"p")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL); + return(1); + } + } + return(0); +} + +/** + * htmlIsScriptAttribute: + * @name: an attribute name + * + * Check if an attribute is of content type Script + * + * Returns 1 is the attribute is a script 0 otherwise + */ +int +htmlIsScriptAttribute(const xmlChar *name) { + unsigned int i; + + if (name == NULL) + return(0); + /* + * all script attributes start with 'on' + */ + if ((name[0] != 'o') || (name[1] != 'n')) + return(0); + for (i = 0; + i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]); + i++) { + if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i])) + return(1); + } + return(0); +} + +/************************************************************************ + * * + * The list of HTML predefined entities * + * * + ************************************************************************/ + + +static const htmlEntityDesc html40EntitiesTable[] = { +/* + * the 4 absolute ones, plus apostrophe. + */ +{ 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" }, +{ 38, "amp", "ampersand, U+0026 ISOnum" }, +{ 39, "apos", "single quote" }, +{ 60, "lt", "less-than sign, U+003C ISOnum" }, +{ 62, "gt", "greater-than sign, U+003E ISOnum" }, + +/* + * A bunch still in the 128-255 range + * Replacing them depend really on the charset used. + */ +{ 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" }, +{ 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" }, +{ 162, "cent", "cent sign, U+00A2 ISOnum" }, +{ 163, "pound","pound sign, U+00A3 ISOnum" }, +{ 164, "curren","currency sign, U+00A4 ISOnum" }, +{ 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" }, +{ 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" }, +{ 167, "sect", "section sign, U+00A7 ISOnum" }, +{ 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" }, +{ 169, "copy", "copyright sign, U+00A9 ISOnum" }, +{ 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" }, +{ 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" }, +{ 172, "not", "not sign, U+00AC ISOnum" }, +{ 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" }, +{ 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" }, +{ 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" }, +{ 176, "deg", "degree sign, U+00B0 ISOnum" }, +{ 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" }, +{ 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" }, +{ 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" }, +{ 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" }, +{ 181, "micro","micro sign, U+00B5 ISOnum" }, +{ 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" }, +{ 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" }, +{ 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" }, +{ 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" }, +{ 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" }, +{ 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" }, +{ 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" }, +{ 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" }, +{ 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" }, +{ 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" }, +{ 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" }, +{ 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" }, +{ 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" }, +{ 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" }, +{ 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" }, +{ 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" }, +{ 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" }, +{ 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" }, +{ 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" }, +{ 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" }, +{ 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" }, +{ 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" }, +{ 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" }, +{ 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" }, +{ 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" }, +{ 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" }, +{ 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" }, +{ 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" }, +{ 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" }, +{ 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" }, +{ 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" }, +{ 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" }, +{ 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" }, +{ 215, "times","multiplication sign, U+00D7 ISOnum" }, +{ 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" }, +{ 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" }, +{ 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" }, +{ 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" }, +{ 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" }, +{ 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" }, +{ 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" }, +{ 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" }, +{ 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" }, +{ 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" }, +{ 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" }, +{ 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" }, +{ 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" }, +{ 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" }, +{ 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" }, +{ 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" }, +{ 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" }, +{ 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" }, +{ 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" }, +{ 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" }, +{ 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" }, +{ 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" }, +{ 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" }, +{ 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" }, +{ 240, "eth", "latin small letter eth, U+00F0 ISOlat1" }, +{ 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" }, +{ 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" }, +{ 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" }, +{ 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" }, +{ 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" }, +{ 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" }, +{ 247, "divide","division sign, U+00F7 ISOnum" }, +{ 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" }, +{ 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" }, +{ 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" }, +{ 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" }, +{ 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" }, +{ 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" }, +{ 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" }, +{ 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" }, + +{ 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" }, +{ 339, "oelig","latin small ligature oe, U+0153 ISOlat2" }, +{ 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" }, +{ 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" }, +{ 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" }, + +/* + * Anything below should really be kept as entities references + */ +{ 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" }, + +{ 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" }, +{ 732, "tilde","small tilde, U+02DC ISOdia" }, + +{ 913, "Alpha","greek capital letter alpha, U+0391" }, +{ 914, "Beta", "greek capital letter beta, U+0392" }, +{ 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" }, +{ 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" }, +{ 917, "Epsilon","greek capital letter epsilon, U+0395" }, +{ 918, "Zeta", "greek capital letter zeta, U+0396" }, +{ 919, "Eta", "greek capital letter eta, U+0397" }, +{ 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" }, +{ 921, "Iota", "greek capital letter iota, U+0399" }, +{ 922, "Kappa","greek capital letter kappa, U+039A" }, +{ 923, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" }, +{ 924, "Mu", "greek capital letter mu, U+039C" }, +{ 925, "Nu", "greek capital letter nu, U+039D" }, +{ 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" }, +{ 927, "Omicron","greek capital letter omicron, U+039F" }, +{ 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" }, +{ 929, "Rho", "greek capital letter rho, U+03A1" }, +{ 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" }, +{ 932, "Tau", "greek capital letter tau, U+03A4" }, +{ 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" }, +{ 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" }, +{ 935, "Chi", "greek capital letter chi, U+03A7" }, +{ 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" }, +{ 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" }, + +{ 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" }, +{ 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" }, +{ 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" }, +{ 948, "delta","greek small letter delta, U+03B4 ISOgrk3" }, +{ 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" }, +{ 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" }, +{ 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" }, +{ 952, "theta","greek small letter theta, U+03B8 ISOgrk3" }, +{ 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" }, +{ 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" }, +{ 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" }, +{ 956, "mu", "greek small letter mu, U+03BC ISOgrk3" }, +{ 957, "nu", "greek small letter nu, U+03BD ISOgrk3" }, +{ 958, "xi", "greek small letter xi, U+03BE ISOgrk3" }, +{ 959, "omicron","greek small letter omicron, U+03BF NEW" }, +{ 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" }, +{ 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" }, +{ 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" }, +{ 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" }, +{ 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" }, +{ 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" }, +{ 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" }, +{ 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" }, +{ 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" }, +{ 969, "omega","greek small letter omega, U+03C9 ISOgrk3" }, +{ 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" }, +{ 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" }, +{ 982, "piv", "greek pi symbol, U+03D6 ISOgrk3" }, + +{ 8194, "ensp", "en space, U+2002 ISOpub" }, +{ 8195, "emsp", "em space, U+2003 ISOpub" }, +{ 8201, "thinsp","thin space, U+2009 ISOpub" }, +{ 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" }, +{ 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" }, +{ 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" }, +{ 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" }, +{ 8211, "ndash","en dash, U+2013 ISOpub" }, +{ 8212, "mdash","em dash, U+2014 ISOpub" }, +{ 8216, "lsquo","left single quotation mark, U+2018 ISOnum" }, +{ 8217, "rsquo","right single quotation mark, U+2019 ISOnum" }, +{ 8218, "sbquo","single low-9 quotation mark, U+201A NEW" }, +{ 8220, "ldquo","left double quotation mark, U+201C ISOnum" }, +{ 8221, "rdquo","right double quotation mark, U+201D ISOnum" }, +{ 8222, "bdquo","double low-9 quotation mark, U+201E NEW" }, +{ 8224, "dagger","dagger, U+2020 ISOpub" }, +{ 8225, "Dagger","double dagger, U+2021 ISOpub" }, + +{ 8226, "bull", "bullet = black small circle, U+2022 ISOpub" }, +{ 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" }, + +{ 8240, "permil","per mille sign, U+2030 ISOtech" }, + +{ 8242, "prime","prime = minutes = feet, U+2032 ISOtech" }, +{ 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" }, + +{ 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" }, +{ 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" }, + +{ 8254, "oline","overline = spacing overscore, U+203E NEW" }, +{ 8260, "frasl","fraction slash, U+2044 NEW" }, + +{ 8364, "euro", "euro sign, U+20AC NEW" }, + +{ 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" }, +{ 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" }, +{ 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" }, +{ 8482, "trade","trade mark sign, U+2122 ISOnum" }, +{ 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" }, +{ 8592, "larr", "leftwards arrow, U+2190 ISOnum" }, +{ 8593, "uarr", "upwards arrow, U+2191 ISOnum" }, +{ 8594, "rarr", "rightwards arrow, U+2192 ISOnum" }, +{ 8595, "darr", "downwards arrow, U+2193 ISOnum" }, +{ 8596, "harr", "left right arrow, U+2194 ISOamsa" }, +{ 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" }, +{ 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" }, +{ 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" }, +{ 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" }, +{ 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" }, +{ 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" }, + +{ 8704, "forall","for all, U+2200 ISOtech" }, +{ 8706, "part", "partial differential, U+2202 ISOtech" }, +{ 8707, "exist","there exists, U+2203 ISOtech" }, +{ 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" }, +{ 8711, "nabla","nabla = backward difference, U+2207 ISOtech" }, +{ 8712, "isin", "element of, U+2208 ISOtech" }, +{ 8713, "notin","not an element of, U+2209 ISOtech" }, +{ 8715, "ni", "contains as member, U+220B ISOtech" }, +{ 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" }, +{ 8721, "sum", "n-ary summation, U+2211 ISOamsb" }, +{ 8722, "minus","minus sign, U+2212 ISOtech" }, +{ 8727, "lowast","asterisk operator, U+2217 ISOtech" }, +{ 8730, "radic","square root = radical sign, U+221A ISOtech" }, +{ 8733, "prop", "proportional to, U+221D ISOtech" }, +{ 8734, "infin","infinity, U+221E ISOtech" }, +{ 8736, "ang", "angle, U+2220 ISOamso" }, +{ 8743, "and", "logical and = wedge, U+2227 ISOtech" }, +{ 8744, "or", "logical or = vee, U+2228 ISOtech" }, +{ 8745, "cap", "intersection = cap, U+2229 ISOtech" }, +{ 8746, "cup", "union = cup, U+222A ISOtech" }, +{ 8747, "int", "integral, U+222B ISOtech" }, +{ 8756, "there4","therefore, U+2234 ISOtech" }, +{ 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" }, +{ 8773, "cong", "approximately equal to, U+2245 ISOtech" }, +{ 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" }, +{ 8800, "ne", "not equal to, U+2260 ISOtech" }, +{ 8801, "equiv","identical to, U+2261 ISOtech" }, +{ 8804, "le", "less-than or equal to, U+2264 ISOtech" }, +{ 8805, "ge", "greater-than or equal to, U+2265 ISOtech" }, +{ 8834, "sub", "subset of, U+2282 ISOtech" }, +{ 8835, "sup", "superset of, U+2283 ISOtech" }, +{ 8836, "nsub", "not a subset of, U+2284 ISOamsn" }, +{ 8838, "sube", "subset of or equal to, U+2286 ISOtech" }, +{ 8839, "supe", "superset of or equal to, U+2287 ISOtech" }, +{ 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" }, +{ 8855, "otimes","circled times = vector product, U+2297 ISOamsb" }, +{ 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" }, +{ 8901, "sdot", "dot operator, U+22C5 ISOamsb" }, +{ 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" }, +{ 8969, "rceil","right ceiling, U+2309 ISOamsc" }, +{ 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" }, +{ 8971, "rfloor","right floor, U+230B ISOamsc" }, +{ 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" }, +{ 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" }, +{ 9674, "loz", "lozenge, U+25CA ISOpub" }, + +{ 9824, "spades","black spade suit, U+2660 ISOpub" }, +{ 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" }, +{ 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" }, +{ 9830, "diams","black diamond suit, U+2666 ISOpub" }, + +}; + +/************************************************************************ + * * + * Commodity functions to handle entities * + * * + ************************************************************************/ + +/* + * Macro used to grow the current buffer. + */ +#define growBuffer(buffer) { \ + buffer##_size *= 2; \ + buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + xmlGenericError(xmlGenericErrorContext, "realloc failed\n"); \ + return(NULL); \ + } \ +} + +/** + * htmlEntityLookup: + * @name: the entity name + * + * Lookup the given entity in EntitiesTable + * + * TODO: the linear scan is really ugly, an hash table is really needed. + * + * Returns the associated htmlEntityDescPtr if found, NULL otherwise. + */ +const htmlEntityDesc * +htmlEntityLookup(const xmlChar *name) { + unsigned int i; + + for (i = 0;i < (sizeof(html40EntitiesTable)/ + sizeof(html40EntitiesTable[0]));i++) { + if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name); +#endif + return((const htmlEntityDescPtr) &html40EntitiesTable[i]); + } + } + return(NULL); +} + +/** + * htmlEntityValueLookup: + * @value: the entity's unicode value + * + * Lookup the given entity in EntitiesTable + * + * TODO: the linear scan is really ugly, an hash table is really needed. + * + * Returns the associated htmlEntityDescPtr if found, NULL otherwise. + */ +const htmlEntityDesc * +htmlEntityValueLookup(unsigned int value) { + unsigned int i; +#ifdef DEBUG + unsigned int lv = 0; +#endif + + for (i = 0;i < (sizeof(html40EntitiesTable)/ + sizeof(html40EntitiesTable[0]));i++) { + if (html40EntitiesTable[i].value >= value) { + if (html40EntitiesTable[i].value > value) + break; +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", html40EntitiesTable[i].name); +#endif + return((const htmlEntityDescPtr) &html40EntitiesTable[i]); + } +#ifdef DEBUG + if (lv > html40EntitiesTable[i].value) { + xmlGenericError(xmlGenericErrorContext, + "html40EntitiesTable[] is not sorted (%d > %d)!\n", + lv, html40EntitiesTable[i].value); + } + lv = html40EntitiesTable[i].value; +#endif + } + return(NULL); +} + +/** + * UTF8ToHtml: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an ASCII + * plus HTML entities block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of octets consumed. + */ +int +UTF8ToHtml(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + const unsigned char* processed = in; + const unsigned char* outend; + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend; + unsigned int c, d; + int trailing; + + if (in == NULL) { + /* + * initialization nothing to do + */ + *outlen = 0; + *inlen = 0; + return(0); + } + inend = in + (*inlen); + outend = out + (*outlen); + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) + break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x80) { + if (out + 1 >= outend) + break; + *out++ = c; + } else { + int len; + const htmlEntityDesc * ent; + + /* + * Try to lookup a predefined HTML entity for it + */ + + ent = htmlEntityValueLookup(c); + if (ent == NULL) { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + len = strlen(ent->name); + if (out + 2 + len >= outend) + break; + *out++ = '&'; + memcpy(out, ent->name, len); + out += len; + *out++ = ';'; + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + +/** + * htmlEncodeEntities: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * @quoteChar: the quote character to escape (' or ") or zero. + * + * Take a block of UTF-8 chars in and try to convert it to an ASCII + * plus HTML entities block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of octets consumed. + */ +int +htmlEncodeEntities(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen, int quoteChar) { + const unsigned char* processed = in; + const unsigned char* outend = out + (*outlen); + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend = in + (*inlen); + unsigned int c, d; + int trailing; + + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) + break; + + while (trailing--) { + if (((d= *in++) & 0xC0) != 0x80) { + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if ((c < 0x80) && (c != (unsigned int) quoteChar) && + (c != '&') && (c != '<') && (c != '>')) { + if (out >= outend) + break; + *out++ = c; + } else { + const htmlEntityDesc * ent; + const char *cp; + char nbuf[16]; + int len; + + /* + * Try to lookup a predefined HTML entity for it + */ + ent = htmlEntityValueLookup(c); + if (ent == NULL) { + snprintf(nbuf, sizeof(nbuf), "#%u", c); + cp = nbuf; + } + else + cp = ent->name; + len = strlen(cp); + if (out + 2 + len > outend) + break; + *out++ = '&'; + memcpy(out, cp, len); + out += len; + *out++ = ';'; + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + +/** + * htmlDecodeEntities: + * @ctxt: the parser context + * @len: the len to decode (in bytes !), -1 for no size limit + * @end: an end marker xmlChar, 0 if none + * @end2: an end marker xmlChar, 0 if none + * @end3: an end marker xmlChar, 0 if none + * + * Substitute the HTML entities by their value + * + * DEPRECATED !!!! + * + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +xmlChar * +htmlDecodeEntities(htmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, + xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "htmlDecodeEntities() deprecated function reached\n"); + deprecated = 1; + } + return(NULL); +} + +/************************************************************************ + * * + * Commodity functions to handle streams * + * * + ************************************************************************/ + +/** + * htmlNewInputStream: + * @ctxt: an HTML parser context + * + * Create a new input stream structure + * Returns the new input stream or NULL + */ +static htmlParserInputPtr +htmlNewInputStream(htmlParserCtxtPtr ctxt) { + htmlParserInputPtr input; + + input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput)); + if (input == NULL) { + ctxt->errNo = XML_ERR_NO_MEMORY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "malloc: couldn't allocate a new input stream\n"); + return(NULL); + } + memset(input, 0, sizeof(htmlParserInput)); + input->filename = NULL; + input->directory = NULL; + input->base = NULL; + input->cur = NULL; + input->buf = NULL; + input->line = 1; + input->col = 1; + input->buf = NULL; + input->free = NULL; + input->version = NULL; + input->consumed = 0; + input->length = 0; + return(input); +} + + +/************************************************************************ + * * + * Commodity functions, cleanup needed ? * + * * + ************************************************************************/ +/* + * all tags allowing pc data from the html 4.01 loose dtd + * NOTE: it might be more apropriate to integrate this information + * into the html40ElementTable array but I don't want to risk any + * binary incomptibility + */ +static const char *allowPCData[] = { + "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big", + "blockquote", "body", "button", "caption", "center", "cite", "code", + "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2", + "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend", + "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp", + "small", "span", "strike", "strong", "td", "th", "tt", "u", "var" +}; + +/** + * areBlanks: + * @ctxt: an HTML parser context + * @str: a xmlChar * + * @len: the size of @str + * + * Is this a sequence of blank chars that one can ignore ? + * + * Returns 1 if ignorable 0 otherwise. + */ + +static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) { + unsigned int i; + int j; + xmlNodePtr lastChild; + + for (j = 0;j < len;j++) + if (!(IS_BLANK(str[j]))) return(0); + + if (CUR == 0) return(1); + if (CUR != '<') return(0); + if (ctxt->name == NULL) + return(1); + if (xmlStrEqual(ctxt->name, BAD_CAST"html")) + return(1); + if (xmlStrEqual(ctxt->name, BAD_CAST"head")) + return(1); + if (xmlStrEqual(ctxt->name, BAD_CAST"body")) + return(1); + if (ctxt->node == NULL) return(0); + lastChild = xmlGetLastChild(ctxt->node); + if (lastChild == NULL) { + if ((ctxt->node->type != XML_ELEMENT_NODE) && + (ctxt->node->content != NULL)) return(0); + /* keep ws in constructs like ...<b> </b>... + for all tags "b" allowing PCDATA */ + for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { + if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) { + return(0); + } + } + } else if (xmlNodeIsText(lastChild)) { + return(0); + } else { + /* keep ws in constructs like <p><b>xy</b> <i>z</i><p> + for all tags "p" allowing PCDATA */ + for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { + if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) { + return(0); + } + } + } + return(1); +} + +/** + * htmlNewDocNoDtD: + * @URI: URI for the dtd, or NULL + * @ExternalID: the external ID of the DTD, or NULL + * + * Creates a new HTML document without a DTD node if @URI and @ExternalID + * are NULL + * + * Returns a new document, do not initialize the DTD if not provided + */ +htmlDocPtr +htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { + xmlDocPtr cur; + + /* + * Allocate a new document and fill the fields. + */ + cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlNewDocNoDtD : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlDoc)); + + cur->type = XML_HTML_DOCUMENT_NODE; + cur->version = NULL; + cur->intSubset = NULL; + cur->doc = cur; + cur->name = NULL; + cur->children = NULL; + cur->extSubset = NULL; + cur->oldNs = NULL; + cur->encoding = NULL; + cur->standalone = 1; + cur->compression = 0; + cur->ids = NULL; + cur->refs = NULL; + cur->_private = NULL; + if ((ExternalID != NULL) || + (URI != NULL)) + xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI); + return(cur); +} + +/** + * htmlNewDoc: + * @URI: URI for the dtd, or NULL + * @ExternalID: the external ID of the DTD, or NULL + * + * Creates a new HTML document + * + * Returns a new document + */ +htmlDocPtr +htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) { + if ((URI == NULL) && (ExternalID == NULL)) + return(htmlNewDocNoDtD( + BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd", + BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN")); + + return(htmlNewDocNoDtD(URI, ExternalID)); +} + + +/************************************************************************ + * * + * The parser itself * + * Relates to http://www.w3.org/TR/html40 * + * * + ************************************************************************/ + +/************************************************************************ + * * + * The parser itself * + * * + ************************************************************************/ + +/** + * htmlParseHTMLName: + * @ctxt: an HTML parser context + * + * parse an HTML tag or attribute name, note that we convert it to lowercase + * since HTML names are not case-sensitive. + * + * Returns the Tag Name parsed or NULL + */ + +static xmlChar * +htmlParseHTMLName(htmlParserCtxtPtr ctxt) { + xmlChar *ret = NULL; + int i = 0; + xmlChar loc[HTML_PARSER_BUFFER_SIZE]; + + if (!IS_LETTER(CUR) && (CUR != '_') && + (CUR != ':')) return(NULL); + + while ((i < HTML_PARSER_BUFFER_SIZE) && + ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == ':') || (CUR == '-') || (CUR == '_'))) { + if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; + else loc[i] = CUR; + i++; + + NEXT; + } + + ret = xmlStrndup(loc, i); + + return(ret); +} + +/** + * htmlParseName: + * @ctxt: an HTML parser context + * + * parse an HTML name, this routine is case sensitive. + * + * Returns the Name parsed or NULL + */ + +static xmlChar * +htmlParseName(htmlParserCtxtPtr ctxt) { + xmlChar buf[HTML_MAX_NAMELEN]; + int len = 0; + + GROW; + if (!IS_LETTER(CUR) && (CUR != '_')) { + return(NULL); + } + + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) { + buf[len++] = CUR; + NEXT; + if (len >= HTML_MAX_NAMELEN) { + xmlGenericError(xmlGenericErrorContext, + "htmlParseName: reached HTML_MAX_NAMELEN limit\n"); + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + break; + } + } + return(xmlStrndup(buf, len)); +} + +/** + * htmlParseHTMLAttribute: + * @ctxt: an HTML parser context + * @stop: a char stop value + * + * parse an HTML attribute value till the stop (quote), if + * stop is 0 then it stops at the first space + * + * Returns the attribute parsed or NULL + */ + +static xmlChar * +htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { + xmlChar *buffer = NULL; + int buffer_size = 0; + xmlChar *out = NULL; + xmlChar *name = NULL; + + xmlChar *cur = NULL; + const htmlEntityDesc * ent; + + /* + * allocate a translation buffer. + */ + buffer_size = HTML_PARSER_BUFFER_SIZE; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlParseHTMLAttribute: malloc failed\n"); + return(NULL); + } + out = buffer; + + /* + * Ok loop until we reach one of the ending chars + */ + while ((CUR != 0) && (CUR != stop)) { + if ((stop == 0) && (CUR == '>')) break; + if ((stop == 0) && (IS_BLANK(CUR))) break; + if (CUR == '&') { + if (NXT(1) == '#') { + unsigned int c; + int bits; + + c = htmlParseCharRef(ctxt); + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + } else { + ent = htmlParseEntityRef(ctxt, &name); + if (name == NULL) { + *out++ = '&'; + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + } else if (ent == NULL) { + *out++ = '&'; + cur = name; + while (*cur != 0) { + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + *out++ = *cur++; + } + xmlFree(name); + } else { + unsigned int c; + int bits; + + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + c = (xmlChar)ent->value; + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + xmlFree(name); + } + } + } else { + unsigned int c; + int bits, l; + + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } + c = CUR_CHAR(l); + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + NEXT; + } + } + *out++ = 0; + return(buffer); +} + +/** + * htmlParseEntityRef: + * @ctxt: an HTML parser context + * @str: location to store the entity name + * + * parse an HTML ENTITY references + * + * [68] EntityRef ::= '&' Name ';' + * + * Returns the associated htmlEntityDescPtr if found, or NULL otherwise, + * if non-NULL *str will have to be freed by the caller. + */ +const htmlEntityDesc * +htmlParseEntityRef(htmlParserCtxtPtr ctxt, xmlChar **str) { + xmlChar *name; + const htmlEntityDesc * ent = NULL; + *str = NULL; + + if (CUR == '&') { + NEXT; + name = htmlParseName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "htmlParseEntityRef: no name\n"); + ctxt->wellFormed = 0; + } else { + GROW; + if (CUR == ';') { + *str = name; + + /* + * Lookup the entity in the table. + */ + ent = htmlEntityLookup(name); + if (ent != NULL) /* OK that's ugly !!! */ + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseEntityRef: expecting ';'\n"); + *str = name; + } + } + } + return(ent); +} + +/** + * htmlParseAttValue: + * @ctxt: an HTML parser context + * + * parse a value for an attribute + * Note: the parser won't do substitution of entities here, this + * will be handled later in xmlStringGetNodeList, unless it was + * asked for ctxt->replaceEntities != 0 + * + * Returns the AttValue parsed or NULL. + */ + +static xmlChar * +htmlParseAttValue(htmlParserCtxtPtr ctxt) { + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + ret = htmlParseHTMLAttribute(ctxt, '"'); + if (CUR != '"') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); + ctxt->wellFormed = 0; + } else + NEXT; + } else if (CUR == '\'') { + NEXT; + ret = htmlParseHTMLAttribute(ctxt, '\''); + if (CUR != '\'') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); + ctxt->wellFormed = 0; + } else + NEXT; + } else { + /* + * That's an HTMLism, the attribute value may not be quoted + */ + ret = htmlParseHTMLAttribute(ctxt, 0); + if (ret == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: no value found\n"); + ctxt->wellFormed = 0; + } + } + return(ret); +} + +/** + * htmlParseSystemLiteral: + * @ctxt: an HTML parser context + * + * parse an HTML Literal + * + * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") + * + * Returns the SystemLiteral parsed or NULL + */ + +static xmlChar * +htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) + NEXT; + if (!IS_CHAR(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_CHAR(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + } + + return(ret); +} + +/** + * htmlParsePubidLiteral: + * @ctxt: an HTML parser context + * + * parse an HTML public literal + * + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + * + * Returns the PubidLiteral parsed or NULL. + */ + +static xmlChar * +htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + /* + * Name ::= (Letter | '_') (NameChar)* + */ + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while (IS_PUBIDCHAR(CUR)) NEXT; + if (CUR != '"') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_LETTER(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_LETTER(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + } + + return(ret); +} + +/** + * htmlParseScript: + * @ctxt: an HTML parser context + * + * parse the content of an HTML SCRIPT or STYLE element + * http://www.w3.org/TR/html4/sgml/dtd.html#Script + * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet + * http://www.w3.org/TR/html4/types.html#type-script + * http://www.w3.org/TR/html4/types.html#h-6.15 + * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1 + * + * Script data ( %Script; in the DTD) can be the content of the SCRIPT + * element and the value of intrinsic event attributes. User agents must + * not evaluate script data as HTML markup but instead must pass it on as + * data to a script engine. + * NOTES: + * - The content is passed like CDATA + * - the attributes for style and scripting "onXXX" are also described + * as CDATA but SGML allows entities references in attributes so their + * processing is identical as other attributes + */ +static void +htmlParseScript(htmlParserCtxtPtr ctxt) { + xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 1]; + int nbchar = 0; + xmlChar cur; + + SHRINK; + cur = CUR; + while (IS_CHAR(cur)) { + if ((cur == '<') && (NXT(1) == '!') && (NXT(2) == '-') && + (NXT(3) == '-')) { + if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (ctxt->sax->cdataBlock!= NULL) { + /* + * Insert as CDATA, which is the same as HTML_PRESERVE_NODE + */ + ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); + } + } + nbchar = 0; + htmlParseComment(ctxt); + cur = CUR; + continue; + } else if ((cur == '<') && (NXT(1) == '/')) { + /* + * One should break here, the specification is clear: + * Authors should therefore escape "</" within the content. + * Escape mechanisms are specific to each scripting or + * style sheet language. + */ + if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) || + ((NXT(2) >= 'a') && (NXT(2) <= 'z'))) + break; /* while */ + } + buf[nbchar++] = cur; + if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { + if (ctxt->sax->cdataBlock!= NULL) { + /* + * Insert as CDATA, which is the same as HTML_PRESERVE_NODE + */ + ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); + } + nbchar = 0; + } + NEXT; + cur = CUR; + } + if (!(IS_CHAR(cur))) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Invalid char in CDATA 0x%X\n", cur); + ctxt->wellFormed = 0; + NEXT; + } + + if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (ctxt->sax->cdataBlock!= NULL) { + /* + * Insert as CDATA, which is the same as HTML_PRESERVE_NODE + */ + ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); + } + } +} + + +/** + * htmlParseCharData: + * @ctxt: an HTML parser context + * + * parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +static void +htmlParseCharData(htmlParserCtxtPtr ctxt) { + xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; + int nbchar = 0; + int cur, l; + + SHRINK; + cur = CUR_CHAR(l); + while (((cur != '<') || (ctxt->token == '<')) && + ((cur != '&') || (ctxt->token == '&')) && + (IS_CHAR(cur))) { + COPY_BUF(l,buf,nbchar,cur); + if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { + /* + * Ok the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + buf, nbchar); + } else { + htmlCheckParagraph(ctxt); + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + nbchar = 0; + } + NEXTL(l); + cur = CUR_CHAR(l); + } + if (nbchar != 0) { + /* + * Ok the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); + } else { + htmlCheckParagraph(ctxt); + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + } else { + /* + * Loop detection + */ + if (cur == 0) + ctxt->instate = XML_PARSER_EOF; + } +} + +/** + * htmlParseExternalID: + * @ctxt: an HTML parser context + * @publicID: a xmlChar** receiving PubidLiteral + * + * Parse an External ID or a Public ID + * + * [75] ExternalID ::= 'SYSTEM' S SystemLiteral + * | 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [83] PublicID ::= 'PUBLIC' S PubidLiteral + * + * Returns the function returns SystemLiteral and in the second + * case publicID receives PubidLiteral, is strict is off + * it is possible to return NULL and have publicID set. + */ + +static xmlChar * +htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) { + xmlChar *URI = NULL; + + if ((UPPER == 'S') && (UPP(1) == 'Y') && + (UPP(2) == 'S') && (UPP(3) == 'T') && + (UPP(4) == 'E') && (UPP(5) == 'M')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'SYSTEM'\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + URI = htmlParseSystemLiteral(ctxt); + if (URI == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseExternalID: SYSTEM, no URI\n"); + ctxt->wellFormed = 0; + } + } else if ((UPPER == 'P') && (UPP(1) == 'U') && + (UPP(2) == 'B') && (UPP(3) == 'L') && + (UPP(4) == 'I') && (UPP(5) == 'C')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'PUBLIC'\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + *publicID = htmlParsePubidLiteral(ctxt); + if (*publicID == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseExternalID: PUBLIC, no Public Identifier\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + if ((CUR == '"') || (CUR == '\'')) { + URI = htmlParseSystemLiteral(ctxt); + } + } + return(URI); +} + +/** + * htmlParseComment: + * @ctxt: an HTML parser context + * + * Parse an XML (SGML) comment <!-- .... --> + * + * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' + */ +static void +htmlParseComment(htmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len; + int size = HTML_PARSER_BUFFER_SIZE; + int q, ql; + int r, rl; + int cur, l; + xmlParserInputState state; + + /* + * Check that there is a comment right here. + */ + if ((RAW != '<') || (NXT(1) != '!') || + (NXT(2) != '-') || (NXT(3) != '-')) return; + + state = ctxt->instate; + ctxt->instate = XML_PARSER_COMMENT; + SHRINK; + SKIP(4); + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + q = CUR_CHAR(ql); + NEXTL(ql); + r = CUR_CHAR(rl); + NEXTL(rl); + cur = CUR_CHAR(l); + len = 0; + while (IS_CHAR(cur) && + ((cur != '>') || + (r != '-') || (q != '-'))) { + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + } + COPY_BUF(ql,buf,len,q); + q = r; + ql = rl; + r = cur; + rl = l; + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (!IS_CHAR(cur)) { + ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Comment not terminated \n<!--%.50s\n", buf); + ctxt->wellFormed = 0; + xmlFree(buf); + } else { + NEXT; + if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->comment(ctxt->userData, buf); + xmlFree(buf); + } + ctxt->instate = state; +} + +/** + * htmlParseCharRef: + * @ctxt: an HTML parser context + * + * parse Reference declarations + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * Returns the value parsed (as an int) + */ +int +htmlParseCharRef(htmlParserCtxtPtr ctxt) { + int val = 0; + + if ((CUR == '&') && (NXT(1) == '#') && + (NXT(2) == 'x')) { + SKIP(3); + while (CUR != ';') { + if ((CUR >= '0') && (CUR <= '9')) + val = val * 16 + (CUR - '0'); + else if ((CUR >= 'a') && (CUR <= 'f')) + val = val * 16 + (CUR - 'a') + 10; + else if ((CUR >= 'A') && (CUR <= 'F')) + val = val * 16 + (CUR - 'A') + 10; + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseCharRef: invalid hexadecimal value\n"); + ctxt->wellFormed = 0; + return(0); + } + NEXT; + } + if (CUR == ';') + NEXT; + } else if ((CUR == '&') && (NXT(1) == '#')) { + SKIP(2); + while (CUR != ';') { + if ((CUR >= '0') && (CUR <= '9')) + val = val * 10 + (CUR - '0'); + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseCharRef: invalid decimal value\n"); + ctxt->wellFormed = 0; + return(0); + } + NEXT; + } + if (CUR == ';') + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "htmlParseCharRef: invalid value\n"); + ctxt->wellFormed = 0; + } + /* + * Check the value IS_CHAR ... + */ + if (IS_CHAR(val)) { + return(val); + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "htmlParseCharRef: invalid xmlChar value %d\n", + val); + ctxt->wellFormed = 0; + } + return(0); +} + + +/** + * htmlParseDocTypeDecl: + * @ctxt: an HTML parser context + * + * parse a DOCTYPE declaration + * + * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? + * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' + */ + +static void +htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *ExternalID = NULL; + xmlChar *URI = NULL; + + /* + * We know that '<!DOCTYPE' has been detected. + */ + SKIP(9); + + SKIP_BLANKS; + + /* + * Parse the DOCTYPE name. + */ + name = htmlParseName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "htmlParseDocTypeDecl : no DOCTYPE name !\n"); + ctxt->wellFormed = 0; + } + /* + * Check that upper(name) == "HTML" !!!!!!!!!!!!! + */ + + SKIP_BLANKS; + + /* + * Check for SystemID and ExternalID + */ + URI = htmlParseExternalID(ctxt, &ExternalID); + SKIP_BLANKS; + + /* + * We should be at the end of the DOCTYPE declaration. + */ + if (CUR != '>') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); + ctxt->wellFormed = 0; + /* We shouldn't try to resynchronize ... */ + } + NEXT; + + /* + * Create or update the document accordingly to the DOCTYPE + */ + if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); + + /* + * Cleanup, since we don't use all those identifiers + */ + if (URI != NULL) xmlFree(URI); + if (ExternalID != NULL) xmlFree(ExternalID); + if (name != NULL) xmlFree(name); +} + +/** + * htmlParseAttribute: + * @ctxt: an HTML parser context + * @value: a xmlChar ** used to store the value of the attribute + * + * parse an attribute + * + * [41] Attribute ::= Name Eq AttValue + * + * [25] Eq ::= S? '=' S? + * + * With namespace: + * + * [NS 11] Attribute ::= QName Eq AttValue + * + * Also the case QName == xmlns:??? is handled independently as a namespace + * definition. + * + * Returns the attribute name, and the value in *value. + */ + +static xmlChar * +htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) { + xmlChar *name, *val = NULL; + + *value = NULL; + name = htmlParseHTMLName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); + ctxt->wellFormed = 0; + return(NULL); + } + + /* + * read the value + */ + SKIP_BLANKS; + if (CUR == '=') { + NEXT; + SKIP_BLANKS; + val = htmlParseAttValue(ctxt); + /****** + } else { + * TODO : some attribute must have values, some may not + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->warning(ctxt->userData, + "No value for attribute %s\n", name); */ + } + + *value = val; + return(name); +} + +/** + * htmlCheckEncoding: + * @ctxt: an HTML parser context + * @attvalue: the attribute value + * + * Checks an http-equiv attribute from a Meta tag to detect + * the encoding + * If a new encoding is detected the parser is switched to decode + * it and pass UTF8 + */ +static void +htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { + const xmlChar *encoding; + + if ((ctxt == NULL) || (attvalue == NULL)) + return; + + /* do not change encoding */ + if (ctxt->input->encoding != NULL) + return; + + encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); + if (encoding != NULL) { + encoding += 8; + } else { + encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); + if (encoding != NULL) + encoding += 9; + } + if (encoding != NULL) { + xmlCharEncoding enc; + xmlCharEncodingHandlerPtr handler; + + while ((*encoding == ' ') || (*encoding == '\t')) encoding++; + + if (ctxt->input->encoding != NULL) + xmlFree((xmlChar *) ctxt->input->encoding); + ctxt->input->encoding = xmlStrdup(encoding); + + enc = xmlParseCharEncoding((const char *) encoding); + /* + * registered set of known encodings + */ + if (enc != XML_CHAR_ENCODING_ERROR) { + xmlSwitchEncoding(ctxt, enc); + ctxt->charset = XML_CHAR_ENCODING_UTF8; + } else { + /* + * fallback for unknown encodings + */ + handler = xmlFindCharEncodingHandler((const char *) encoding); + if (handler != NULL) { + xmlSwitchToEncoding(ctxt, handler); + ctxt->charset = XML_CHAR_ENCODING_UTF8; + } else { + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + } + } + + if ((ctxt->input->buf != NULL) && + (ctxt->input->buf->encoder != NULL) && + (ctxt->input->buf->raw != NULL) && + (ctxt->input->buf->buffer != NULL)) { + int nbchars; + int processed; + + /* + * convert as much as possible to the parser reading buffer. + */ + processed = ctxt->input->cur - ctxt->input->base; + xmlBufferShrink(ctxt->input->buf->buffer, processed); + nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, + ctxt->input->buf->buffer, + ctxt->input->buf->raw); + if (nbchars < 0) { + ctxt->errNo = XML_ERR_INVALID_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlCheckEncoding: encoder error\n"); + } + ctxt->input->base = + ctxt->input->cur = ctxt->input->buf->buffer->content; + } + } +} + +/** + * htmlCheckMeta: + * @ctxt: an HTML parser context + * @atts: the attributes values + * + * Checks an attributes from a Meta tag + */ +static void +htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { + int i; + const xmlChar *att, *value; + int http = 0; + const xmlChar *content = NULL; + + if ((ctxt == NULL) || (atts == NULL)) + return; + + i = 0; + att = atts[i++]; + while (att != NULL) { + value = atts[i++]; + if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) + && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) + http = 1; + else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) + content = value; + att = atts[i++]; + } + if ((http) && (content != NULL)) + htmlCheckEncoding(ctxt, content); + +} + +/** + * htmlParseStartTag: + * @ctxt: an HTML parser context + * + * parse a start of tag either for rule element or + * EmptyElement. In both case we don't parse the tag closing chars. + * + * [40] STag ::= '<' Name (S Attribute)* S? '>' + * + * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' + * + * With namespace: + * + * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' + * + * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' + * + */ + +static void +htmlParseStartTag(htmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *attname; + xmlChar *attvalue; + const xmlChar **atts = NULL; + int nbatts = 0; + int maxatts = 0; + int meta = 0; + int i; + + if (CUR != '<') return; + NEXT; + + GROW; + name = htmlParseHTMLName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseStartTag: invalid element name\n"); + ctxt->wellFormed = 0; + /* Dump the bogus tag like browsers do */ + while ((IS_CHAR(CUR)) && (CUR != '>')) + NEXT; + return; + } + if (xmlStrEqual(name, BAD_CAST"meta")) + meta = 1; + + /* + * Check for auto-closure of HTML elements. + */ + htmlAutoClose(ctxt, name); + + /* + * Check for implied HTML elements. + */ + htmlCheckImplied(ctxt, name); + + /* + * Avoid html at any level > 0, head at any level != 1 + * or any attempt to recurse body + */ + if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseStartTag: misplaced <html> tag\n"); + ctxt->wellFormed = 0; + xmlFree(name); + return; + } + if ((ctxt->nameNr != 1) && + (xmlStrEqual(name, BAD_CAST"head"))) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseStartTag: misplaced <head> tag\n"); + ctxt->wellFormed = 0; + xmlFree(name); + return; + } + if (xmlStrEqual(name, BAD_CAST"body")) { + int indx; + for (indx = 0;indx < ctxt->nameNr;indx++) { + if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseStartTag: misplaced <body> tag\n"); + ctxt->wellFormed = 0; + xmlFree(name); + return; + } + } + } + + /* + * Now parse the attributes, it ends up with the ending + * + * (S Attribute)* S? + */ + SKIP_BLANKS; + while ((IS_CHAR(CUR)) && + (CUR != '>') && + ((CUR != '/') || (NXT(1) != '>'))) { + long cons = ctxt->nbChars; + + GROW; + attname = htmlParseAttribute(ctxt, &attvalue); + if (attname != NULL) { + + /* + * Well formedness requires at most one declaration of an attribute + */ + for (i = 0; i < nbatts;i += 2) { + if (xmlStrEqual(atts[i], attname)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Attribute %s redefined\n", + attname); + ctxt->wellFormed = 0; + xmlFree(attname); + if (attvalue != NULL) + xmlFree(attvalue); + goto failed; + } + } + + /* + * Add the pair to atts + */ + if (atts == NULL) { + maxatts = 10; + atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); + if (atts == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %ld byte failed\n", + maxatts * (long)sizeof(xmlChar *)); + if (name != NULL) xmlFree(name); + return; + } + } else if (nbatts + 4 > maxatts) { + maxatts *= 2; + atts = (const xmlChar **) xmlRealloc((void *) atts, + maxatts * sizeof(xmlChar *)); + if (atts == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %ld byte failed\n", + maxatts * (long)sizeof(xmlChar *)); + if (name != NULL) xmlFree(name); + return; + } + } + atts[nbatts++] = attname; + atts[nbatts++] = attvalue; + atts[nbatts] = NULL; + atts[nbatts + 1] = NULL; + } + else { + /* Dump the bogus attribute string up to the next blank or + * the end of the tag. */ + while ((IS_CHAR(CUR)) && !(IS_BLANK(CUR)) && (CUR != '>') + && ((CUR != '/') || (NXT(1) != '>'))) + NEXT; + } + +failed: + SKIP_BLANKS; + if (cons == ctxt->nbChars) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseStartTag: problem parsing attributes\n"); + ctxt->wellFormed = 0; + break; + } + } + + /* + * Handle specific association to the META tag + */ + if (meta) + htmlCheckMeta(ctxt, atts); + + /* + * SAX: Start of Element ! + */ + htmlnamePush(ctxt, xmlStrdup(name)); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, name, atts); + + if (atts != NULL) { + for (i = 0;i < nbatts;i++) { + if (atts[i] != NULL) + xmlFree((xmlChar *) atts[i]); + } + xmlFree((void *) atts); + } + if (name != NULL) xmlFree(name); +} + +/** + * htmlParseEndTag: + * @ctxt: an HTML parser context + * + * parse an end of tag + * + * [42] ETag ::= '</' Name S? '>' + * + * With namespace + * + * [NS 9] ETag ::= '</' QName S? '>' + * + * Returns 1 if the current level should be closed. + */ + +static int +htmlParseEndTag(htmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *oldname; + int i, ret; + + if ((CUR != '<') || (NXT(1) != '/')) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "htmlParseEndTag: '</' not found\n"); + ctxt->wellFormed = 0; + return(0); + } + SKIP(2); + + name = htmlParseHTMLName(ctxt); + if (name == NULL) return(0); + + /* + * We should definitely be at the ending "S? '>'" part + */ + SKIP_BLANKS; + if ((!IS_CHAR(CUR)) || (CUR != '>')) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); + ctxt->wellFormed = 0; + } else + NEXT; + + /* + * If the name read is not one of the element in the parsing stack + * then return, it's just an error. + */ + for (i = (ctxt->nameNr - 1);i >= 0;i--) { + if (xmlStrEqual(name, ctxt->nameTab[i])) break; + } + if (i < 0) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Unexpected end tag : %s\n", name); + xmlFree(name); + ctxt->wellFormed = 0; + return(0); + } + + + /* + * Check for auto-closure of HTML elements. + */ + + htmlAutoCloseOnClose(ctxt, name); + + /* + * Well formedness constraints, opening and closing must match. + * With the exception that the autoclose may have popped stuff out + * of the stack. + */ + if (!xmlStrEqual(name, ctxt->name)) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name); +#endif + if ((ctxt->name != NULL) && + (!xmlStrEqual(ctxt->name, name))) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Opening and ending tag mismatch: %s and %s\n", + name, ctxt->name); + ctxt->wellFormed = 0; + } + } + + /* + * SAX: End of Tag + */ + oldname = ctxt->name; + if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname); +#endif + xmlFree(oldname); +#ifdef DEBUG + } else { + xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name); +#endif + } + ret = 1; + } else { + ret = 0; + } + + if (name != NULL) + xmlFree(name); + + return(ret); +} + + +/** + * htmlParseReference: + * @ctxt: an HTML parser context + * + * parse and handle entity references in content, + * this will end-up in a call to character() since this is either a + * CharRef, or a predefined entity. + */ +static void +htmlParseReference(htmlParserCtxtPtr ctxt) { + const htmlEntityDesc * ent; + xmlChar out[6]; + xmlChar *name; + if (CUR != '&') return; + + if (NXT(1) == '#') { + unsigned int c; + int bits, i = 0; + + c = htmlParseCharRef(ctxt); + if (c == 0) + return; + + if (c < 0x80) { out[i++]= c; bits= -6; } + else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + out[i++]= ((c >> bits) & 0x3F) | 0x80; + } + out[i] = 0; + + htmlCheckParagraph(ctxt); + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, out, i); + } else { + ent = htmlParseEntityRef(ctxt, &name); + if (name == NULL) { + htmlCheckParagraph(ctxt); + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1); + return; + } + if ((ent == NULL) || !(ent->value > 0)) { + htmlCheckParagraph(ctxt); + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) { + ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1); + ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name)); + /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */ + } + } else { + unsigned int c; + int bits, i = 0; + + c = ent->value; + if (c < 0x80) + { out[i++]= c; bits= -6; } + else if (c < 0x800) + { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + out[i++]= ((c >> bits) & 0x3F) | 0x80; + } + out[i] = 0; + + htmlCheckParagraph(ctxt); + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, out, i); + } + xmlFree(name); + } +} + +/** + * htmlParseContent: + * @ctxt: an HTML parser context + * @name: the node name + * + * Parse a content: comment, sub-element, reference or text. + * + */ + +static void +htmlParseContent(htmlParserCtxtPtr ctxt) { + xmlChar *currentNode; + int depth; + + currentNode = xmlStrdup(ctxt->name); + depth = ctxt->nameNr; + while (1) { + long cons = ctxt->nbChars; + + GROW; + /* + * Our tag or one of it's parent or children is ending. + */ + if ((CUR == '<') && (NXT(1) == '/')) { + if (htmlParseEndTag(ctxt) && + ((currentNode != NULL) || (ctxt->nameNr == 0))) { + if (currentNode != NULL) + xmlFree(currentNode); + return; + } + continue; /* while */ + } + + /* + * Has this node been popped out during parsing of + * the next element + */ + if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && + (!xmlStrEqual(currentNode, ctxt->name))) + { + if (currentNode != NULL) xmlFree(currentNode); + return; + } + + if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) || + (xmlStrEqual(currentNode, BAD_CAST"style")))) { + /* + * Handle SCRIPT/STYLE separately + */ + htmlParseScript(ctxt); + } else { + /* + * Sometimes DOCTYPE arrives in the middle of the document + */ + if ((CUR == '<') && (NXT(1) == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Misplaced DOCTYPE declaration\n"); + ctxt->wellFormed = 0; + htmlParseDocTypeDecl(ctxt); + } + + /* + * First case : a comment + */ + if ((CUR == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) { + htmlParseComment(ctxt); + } + + /* + * Second case : a sub-element. + */ + else if (CUR == '<') { + htmlParseElement(ctxt); + } + + /* + * Third case : a reference. If if has not been resolved, + * parsing returns it's Name, create the node + */ + else if (CUR == '&') { + htmlParseReference(ctxt); + } + + /* + * Fourth : end of the resource + */ + else if (CUR == 0) { + htmlAutoCloseOnEnd(ctxt); + break; + } + + /* + * Last case, text. Note that References are handled directly. + */ + else { + htmlParseCharData(ctxt); + } + + if (cons == ctxt->nbChars) { + if (ctxt->node != NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "detected an error in element content\n"); + ctxt->wellFormed = 0; + } + break; + } + } + GROW; + } + if (currentNode != NULL) xmlFree(currentNode); +} + +/** + * htmlParseElement: + * @ctxt: an HTML parser context + * + * parse an HTML element, this is highly recursive + * + * [39] element ::= EmptyElemTag | STag content ETag + * + * [41] Attribute ::= Name Eq AttValue + */ + +void +htmlParseElement(htmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *currentNode = NULL; + const htmlElemDesc * info; + htmlParserNodeInfo node_info; + xmlChar *oldname; + int depth = ctxt->nameNr; + const xmlChar *oldptr; + + /* Capture start position */ + if (ctxt->record_info) { + node_info.begin_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.begin_line = ctxt->input->line; + } + + oldname = xmlStrdup(ctxt->name); + htmlParseStartTag(ctxt); + name = ctxt->name; +#ifdef DEBUG + if (oldname == NULL) + xmlGenericError(xmlGenericErrorContext, + "Start of element %s\n", name); + else if (name == NULL) + xmlGenericError(xmlGenericErrorContext, + "Start of element failed, was %s\n", oldname); + else + xmlGenericError(xmlGenericErrorContext, + "Start of element %s, was %s\n", name, oldname); +#endif + if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) || + (name == NULL)) { + if (CUR == '>') + NEXT; + if (oldname != NULL) + xmlFree(oldname); + return; + } + if (oldname != NULL) + xmlFree(oldname); + + /* + * Lookup the info for that element. + */ + info = htmlTagLookup(name); + if (info == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Tag %s invalid\n", + name); + ctxt->wellFormed = 0; + } else if (info->depr) { +/*************************** + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n", + name); + ***************************/ + } + + /* + * Check for an Empty Element labeled the XML/SGML way + */ + if ((CUR == '/') && (NXT(1) == '>')) { + SKIP(2); + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = htmlnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + return; + } + + if (CUR == '>') { + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Couldn't find end of Start Tag %s\n", + name); + ctxt->wellFormed = 0; + + /* + * end of parsing of this node. + */ + if (xmlStrEqual(name, ctxt->name)) { + nodePop(ctxt); + oldname = htmlnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + } + + /* + * Capture end position and add node + */ + if ( currentNode != NULL && ctxt->record_info ) { + node_info.end_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.end_line = ctxt->input->line; + node_info.node = ctxt->node; + xmlParserAddNodeInfo(ctxt, &node_info); + } + return; + } + + /* + * Check for an Empty Element from DTD definition + */ + if ((info != NULL) && (info->empty)) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = htmlnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + return; + } + + /* + * Parse the content of the element: + */ + currentNode = xmlStrdup(ctxt->name); + depth = ctxt->nameNr; + while (IS_CHAR(CUR)) { + oldptr = ctxt->input->cur; + htmlParseContent(ctxt); + if (oldptr==ctxt->input->cur) break; + if (ctxt->nameNr < depth) break; + } + + /* + * Capture end position and add node + */ + if ( currentNode != NULL && ctxt->record_info ) { + node_info.end_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.end_line = ctxt->input->line; + node_info.node = ctxt->node; + xmlParserAddNodeInfo(ctxt, &node_info); + } + if (!IS_CHAR(CUR)) { + htmlAutoCloseOnEnd(ctxt); + } + + if (currentNode != NULL) + xmlFree(currentNode); +} + +/** + * htmlParseDocument: + * @ctxt: an HTML parser context + * + * parse an HTML document (and build a tree if using the standard SAX + * interface). + * + * Returns 0, -1 in case of error. the parser context is augmented + * as a result of the parsing. + */ + +int +htmlParseDocument(htmlParserCtxtPtr ctxt) { + xmlDtdPtr dtd; + + xmlInitParser(); + + htmlDefaultSAXHandlerInit(); + ctxt->html = 1; + + GROW; + /* + * SAX: beginning of the document processing. + */ + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); + + /* + * Wipe out everything which is before the first '<' + */ + SKIP_BLANKS; + if (CUR == 0) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Document is empty\n"); + ctxt->wellFormed = 0; + } + + if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + + + /* + * Parse possible comments before any content + */ + while ((CUR == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) { + htmlParseComment(ctxt); + SKIP_BLANKS; + } + + + /* + * Then possibly doc type declaration(s) and more Misc + * (doctypedecl Misc*)? + */ + if ((CUR == '<') && (NXT(1) == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + htmlParseDocTypeDecl(ctxt); + } + SKIP_BLANKS; + + /* + * Parse possible comments before any content + */ + while ((CUR == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) { + htmlParseComment(ctxt); + SKIP_BLANKS; + } + + /* + * Time to start parsing the tree itself + */ + htmlParseContent(ctxt); + + /* + * autoclose + */ + if (CUR == 0) + htmlAutoCloseOnEnd(ctxt); + + + /* + * SAX: end of the document processing. + */ + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + + if (ctxt->myDoc != NULL) { + dtd = xmlGetIntSubset(ctxt->myDoc); + if (dtd == NULL) + ctxt->myDoc->intSubset = + xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "HTML", + BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", + BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); + } + if (! ctxt->wellFormed) return(-1); + return(0); +} + + +/************************************************************************ + * * + * Parser contexts handling * + * * + ************************************************************************/ + +/** + * xmlInitParserCtxt: + * @ctxt: an HTML parser context + * + * Initialize a parser context + */ + +static void +htmlInitParserCtxt(htmlParserCtxtPtr ctxt) +{ + htmlSAXHandler *sax; + + if (ctxt == NULL) return; + memset(ctxt, 0, sizeof(htmlParserCtxt)); + + sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler)); + if (sax == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlInitParserCtxt: out of memory\n"); + } + else + memset(sax, 0, sizeof(htmlSAXHandler)); + + /* Allocate the Input stack */ + ctxt->inputTab = (htmlParserInputPtr *) + xmlMalloc(5 * sizeof(htmlParserInputPtr)); + if (ctxt->inputTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlInitParserCtxt: out of memory\n"); + ctxt->inputNr = 0; + ctxt->inputMax = 0; + ctxt->input = NULL; + return; + } + ctxt->inputNr = 0; + ctxt->inputMax = 5; + ctxt->input = NULL; + ctxt->version = NULL; + ctxt->encoding = NULL; + ctxt->standalone = -1; + ctxt->instate = XML_PARSER_START; + + /* Allocate the Node stack */ + ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr)); + if (ctxt->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlInitParserCtxt: out of memory\n"); + ctxt->nodeNr = 0; + ctxt->nodeMax = 0; + ctxt->node = NULL; + ctxt->inputNr = 0; + ctxt->inputMax = 0; + ctxt->input = NULL; + return; + } + ctxt->nodeNr = 0; + ctxt->nodeMax = 10; + ctxt->node = NULL; + + /* Allocate the Name stack */ + ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); + if (ctxt->nameTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlInitParserCtxt: out of memory\n"); + ctxt->nameNr = 0; + ctxt->nameMax = 10; + ctxt->name = NULL; + ctxt->nodeNr = 0; + ctxt->nodeMax = 0; + ctxt->node = NULL; + ctxt->inputNr = 0; + ctxt->inputMax = 0; + ctxt->input = NULL; + return; + } + ctxt->nameNr = 0; + ctxt->nameMax = 10; + ctxt->name = NULL; + + if (sax == NULL) ctxt->sax = &htmlDefaultSAXHandler; + else { + ctxt->sax = sax; + memcpy(sax, &htmlDefaultSAXHandler, sizeof(htmlSAXHandler)); + } + ctxt->userData = ctxt; + ctxt->myDoc = NULL; + ctxt->wellFormed = 1; + ctxt->replaceEntities = 0; + ctxt->linenumbers = xmlLineNumbersDefaultValue; + ctxt->html = 1; + ctxt->record_info = 0; + ctxt->validate = 0; + ctxt->nbChars = 0; + ctxt->checkIndex = 0; + ctxt->catalogs = NULL; + xmlInitNodeInfoSeq(&ctxt->node_seq); +} + +/** + * htmlFreeParserCtxt: + * @ctxt: an HTML parser context + * + * Free all the memory used by a parser context. However the parsed + * document in ctxt->myDoc is not freed. + */ + +void +htmlFreeParserCtxt(htmlParserCtxtPtr ctxt) +{ + xmlFreeParserCtxt(ctxt); +} + +/** + * htmlNewParserCtxt: + * + * Allocate and initialize a new parser context. + * + * Returns the xmlParserCtxtPtr or NULL + */ + +static htmlParserCtxtPtr +htmlNewParserCtxt(void) +{ + xmlParserCtxtPtr ctxt; + + ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); + if (ctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewParserCtxt : cannot allocate context\n"); + return(NULL); + } + memset(ctxt, 0, sizeof(xmlParserCtxt)); + htmlInitParserCtxt(ctxt); + return(ctxt); +} + +/** + * htmlCreateMemoryParserCtxt: + * @buffer: a pointer to a char array + * @size: the size of the array + * + * Create a parser context for an HTML in-memory document. + * + * Returns the new parser context or NULL + */ +static htmlParserCtxtPtr +htmlCreateMemoryParserCtxt(const char *buffer, int size) { + xmlParserCtxtPtr ctxt; + xmlParserInputPtr input; + xmlParserInputBufferPtr buf; + + if (buffer == NULL) + return(NULL); + if (size <= 0) + return(NULL); + + ctxt = htmlNewParserCtxt(); + if (ctxt == NULL) + return(NULL); + + buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); + if (buf == NULL) return(NULL); + + input = xmlNewInputStream(ctxt); + if (input == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + input->filename = NULL; + input->buf = buf; + input->base = input->buf->buffer->content; + input->cur = input->buf->buffer->content; + input->end = &input->buf->buffer->content[input->buf->buffer->use]; + + inputPush(ctxt, input); + return(ctxt); +} + +/** + * htmlCreateDocParserCtxt: + * @cur: a pointer to an array of xmlChar + * @encoding: a free form C string describing the HTML document encoding, or NULL + * + * Create a parser context for an HTML document. + * + * TODO: check the need to add encoding handling there + * + * Returns the new parser context or NULL + */ +static htmlParserCtxtPtr +htmlCreateDocParserCtxt(xmlChar *cur, const char *encoding ATTRIBUTE_UNUSED) { + int len; + + if (cur == NULL) + return(NULL); + len = xmlStrlen(cur); + return(htmlCreateMemoryParserCtxt((char *)cur, len)); +} + +/************************************************************************ + * * + * Progressive parsing interfaces * + * * + ************************************************************************/ + +/** + * htmlParseLookupSequence: + * @ctxt: an HTML parser context + * @first: the first char to lookup + * @next: the next char to lookup or zero + * @third: the next char to lookup or zero + * + * Try to find if a sequence (first, next, third) or just (first next) or + * (first) is available in the input stream. + * This function has a side effect of (possibly) incrementing ctxt->checkIndex + * to avoid rescanning sequences of bytes, it DOES change the state of the + * parser, do not use liberally. + * This is basically similar to xmlParseLookupSequence() + * + * Returns the index to the current parsing point if the full sequence + * is available, -1 otherwise. + */ +static int +htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, + xmlChar next, xmlChar third) { + int base, len; + htmlParserInputPtr in; + const xmlChar *buf; + int incomment = 0; + + in = ctxt->input; + if (in == NULL) return(-1); + base = in->cur - in->base; + if (base < 0) return(-1); + if (ctxt->checkIndex > base) + base = ctxt->checkIndex; + if (in->buf == NULL) { + buf = in->base; + len = in->length; + } else { + buf = in->buf->buffer->content; + len = in->buf->buffer->use; + } + /* take into account the sequence length */ + if (third) len -= 2; + else if (next) len --; + for (;base < len;base++) { + if (!incomment && (base + 4 < len)) { + if ((buf[base] == '<') && (buf[base + 1] == '!') && + (buf[base + 2] == '-') && (buf[base + 3] == '-')) { + incomment = 1; + } + /* do not increment base, some people use <!--> */ + } + if (incomment) { + if (base + 3 < len) + return(-1); + if ((buf[base] == '-') && (buf[base + 1] == '-') && + (buf[base + 2] == '>')) { + incomment = 0; + base += 2; + } + continue; + } + if (buf[base] == first) { + if (third != 0) { + if ((buf[base + 1] != next) || + (buf[base + 2] != third)) continue; + } else if (next != 0) { + if (buf[base + 1] != next) continue; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + if (next == 0) + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c' found at %d\n", + first, base); + else if (third == 0) + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c%c' found at %d\n", + first, next, base); + else + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c%c%c' found at %d\n", + first, next, third, base); +#endif + return(base - (in->cur - in->base)); + } + } + ctxt->checkIndex = base; +#ifdef DEBUG_PUSH + if (next == 0) + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c' failed\n", first); + else if (third == 0) + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c%c' failed\n", first, next); + else + xmlGenericError(xmlGenericErrorContext, + "HPP: lookup '%c%c%c' failed\n", first, next, third); +#endif + return(-1); +} + +/** + * htmlParseTryOrFinish: + * @ctxt: an HTML parser context + * @terminate: last chunk indicator + * + * Try to progress on parsing + * + * Returns zero if no parsing was possible + */ +static int +htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { + int ret = 0; + htmlParserInputPtr in; + int avail = 0; + xmlChar cur, next; + +#ifdef DEBUG_PUSH + switch (ctxt->instate) { + case XML_PARSER_EOF: + xmlGenericError(xmlGenericErrorContext, + "HPP: try EOF\n"); break; + case XML_PARSER_START: + xmlGenericError(xmlGenericErrorContext, + "HPP: try START\n"); break; + case XML_PARSER_MISC: + xmlGenericError(xmlGenericErrorContext, + "HPP: try MISC\n");break; + case XML_PARSER_COMMENT: + xmlGenericError(xmlGenericErrorContext, + "HPP: try COMMENT\n");break; + case XML_PARSER_PROLOG: + xmlGenericError(xmlGenericErrorContext, + "HPP: try PROLOG\n");break; + case XML_PARSER_START_TAG: + xmlGenericError(xmlGenericErrorContext, + "HPP: try START_TAG\n");break; + case XML_PARSER_CONTENT: + xmlGenericError(xmlGenericErrorContext, + "HPP: try CONTENT\n");break; + case XML_PARSER_CDATA_SECTION: + xmlGenericError(xmlGenericErrorContext, + "HPP: try CDATA_SECTION\n");break; + case XML_PARSER_END_TAG: + xmlGenericError(xmlGenericErrorContext, + "HPP: try END_TAG\n");break; + case XML_PARSER_ENTITY_DECL: + xmlGenericError(xmlGenericErrorContext, + "HPP: try ENTITY_DECL\n");break; + case XML_PARSER_ENTITY_VALUE: + xmlGenericError(xmlGenericErrorContext, + "HPP: try ENTITY_VALUE\n");break; + case XML_PARSER_ATTRIBUTE_VALUE: + xmlGenericError(xmlGenericErrorContext, + "HPP: try ATTRIBUTE_VALUE\n");break; + case XML_PARSER_DTD: + xmlGenericError(xmlGenericErrorContext, + "HPP: try DTD\n");break; + case XML_PARSER_EPILOG: + xmlGenericError(xmlGenericErrorContext, + "HPP: try EPILOG\n");break; + case XML_PARSER_PI: + xmlGenericError(xmlGenericErrorContext, + "HPP: try PI\n");break; + case XML_PARSER_SYSTEM_LITERAL: + xmlGenericError(xmlGenericErrorContext, + "HPP: try SYSTEM_LITERAL\n");break; + } +#endif + + while (1) { + + in = ctxt->input; + if (in == NULL) break; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if ((avail == 0) && (terminate)) { + htmlAutoCloseOnEnd(ctxt); + if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { + /* + * SAX: end of the document processing. + */ + ctxt->instate = XML_PARSER_EOF; + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + } + } + if (avail < 1) + goto done; + switch (ctxt->instate) { + case XML_PARSER_EOF: + /* + * Document parsing is done ! + */ + goto done; + case XML_PARSER_START: + /* + * Very first chars read from the document flow. + */ + cur = in->cur[0]; + if (IS_BLANK(cur)) { + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + } + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + if ((ctxt->sax) && (ctxt->sax->startDocument) && + (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing internal subset\n"); +#endif + htmlParseDocTypeDecl(ctxt); + ctxt->instate = XML_PARSER_PROLOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering PROLOG\n"); +#endif + } else { + ctxt->instate = XML_PARSER_MISC; + } +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering MISC\n"); +#endif + break; + case XML_PARSER_MISC: + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Comment\n"); +#endif + htmlParseComment(ctxt); + ctxt->instate = XML_PARSER_MISC; + } else if ((cur == '<') && (next == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing internal subset\n"); +#endif + htmlParseDocTypeDecl(ctxt); + ctxt->instate = XML_PARSER_PROLOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering PROLOG\n"); +#endif + } else if ((cur == '<') && (next == '!') && + (avail < 9)) { + goto done; + } else { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); +#endif + } + break; + case XML_PARSER_PROLOG: + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Comment\n"); +#endif + htmlParseComment(ctxt); + ctxt->instate = XML_PARSER_PROLOG; + } else if ((cur == '<') && (next == '!') && + (avail < 4)) { + goto done; + } else { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); +#endif + } + break; + case XML_PARSER_EPILOG: + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 1) + goto done; + cur = in->cur[0]; + if (IS_BLANK(cur)) { + htmlParseCharData(ctxt); + goto done; + } + if (avail < 2) + goto done; + next = in->cur[1]; + if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Comment\n"); +#endif + htmlParseComment(ctxt); + ctxt->instate = XML_PARSER_EPILOG; + } else if ((cur == '<') && (next == '!') && + (avail < 4)) { + goto done; + } else { + ctxt->errNo = XML_ERR_DOCUMENT_END; + ctxt->wellFormed = 0; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + break; + case XML_PARSER_START_TAG: { + xmlChar *name, *oldname; + int depth = ctxt->nameNr; + const htmlElemDesc * info; + + if (avail < 2) + goto done; + cur = in->cur[0]; + if (cur != '<') { + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + if (in->cur[1] == '/') { + ctxt->instate = XML_PARSER_END_TAG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering END_TAG\n"); +#endif + break; + } + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; + + oldname = xmlStrdup(ctxt->name); + htmlParseStartTag(ctxt); + name = ctxt->name; +#ifdef DEBUG + if (oldname == NULL) + xmlGenericError(xmlGenericErrorContext, + "Start of element %s\n", name); + else if (name == NULL) + xmlGenericError(xmlGenericErrorContext, + "Start of element failed, was %s\n", + oldname); + else + xmlGenericError(xmlGenericErrorContext, + "Start of element %s, was %s\n", + name, oldname); +#endif + if (((depth == ctxt->nameNr) && + (xmlStrEqual(oldname, ctxt->name))) || + (name == NULL)) { + if (CUR == '>') + NEXT; + if (oldname != NULL) + xmlFree(oldname); + break; + } + if (oldname != NULL) + xmlFree(oldname); + + /* + * Lookup the info for that element. + */ + info = htmlTagLookup(name); + if (info == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Tag %s invalid\n", + name); + ctxt->wellFormed = 0; + } else if (info->depr) { + /*************************** + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Tag %s is deprecated\n", + name); + ***************************/ + } + + /* + * Check for an Empty Element labeled the XML/SGML way + */ + if ((CUR == '/') && (NXT(1) == '>')) { + SKIP(2); + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = htmlnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", + oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + + if (CUR == '>') { + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Couldn't find end of Start Tag %s\n", + name); + ctxt->wellFormed = 0; + + /* + * end of parsing of this node. + */ + if (xmlStrEqual(name, ctxt->name)) { + nodePop(ctxt); + oldname = htmlnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext, + "End of start tag problem: popping out %s\n", oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + } + + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + + /* + * Check for an Empty Element from DTD definition + */ + if ((info != NULL) && (info->empty)) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = htmlnamePop(ctxt); +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname); +#endif + if (oldname != NULL) + xmlFree(oldname); + } + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + } + case XML_PARSER_CONTENT: { + long cons; + /* + * Handle preparsed entities and charRef + */ + if (ctxt->token != 0) { + xmlChar chr[2] = { 0 , 0 } ; + + chr[0] = (xmlChar) ctxt->token; + htmlCheckParagraph(ctxt); + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, chr, 1); + ctxt->token = 0; + ctxt->checkIndex = 0; + } + if ((avail == 1) && (terminate)) { + cur = in->cur[0]; + if ((cur != '<') && (cur != '&')) { + if (ctxt->sax != NULL) { + if (IS_BLANK(cur)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace( + ctxt->userData, &cur, 1); + } else { + htmlCheckParagraph(ctxt); + if (ctxt->sax->characters != NULL) + ctxt->sax->characters( + ctxt->userData, &cur, 1); + } + } + ctxt->token = 0; + ctxt->checkIndex = 0; + in->cur++; + break; + } + } + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + cons = ctxt->nbChars; + if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || + (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { + /* + * Handle SCRIPT/STYLE separately + */ + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '<', '/', 0) < 0)) + goto done; + htmlParseScript(ctxt); + if ((cur == '<') && (next == '/')) { + ctxt->instate = XML_PARSER_END_TAG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering END_TAG\n"); +#endif + break; + } + } else { + /* + * Sometimes DOCTYPE arrives in the middle of the document + */ + if ((cur == '<') && (next == '!') && + (UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Misplaced DOCTYPE declaration\n"); + ctxt->wellFormed = 0; + htmlParseDocTypeDecl(ctxt); + } else if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Comment\n"); +#endif + htmlParseComment(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + } else if ((cur == '<') && (next == '!') && (avail < 4)) { + goto done; + } else if ((cur == '<') && (next == '/')) { + ctxt->instate = XML_PARSER_END_TAG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering END_TAG\n"); +#endif + break; + } else if (cur == '<') { + ctxt->instate = XML_PARSER_START_TAG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); +#endif + break; + } else if (cur == '&') { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing Reference\n"); +#endif + /* TODO: check generation of subtrees if noent !!! */ + htmlParseReference(ctxt); + } else { + /* TODO Avoid the extra copy, handle directly !!!!!! */ + /* + * Goal of the following test is: + * - minimize calls to the SAX 'character' callback + * when they are mergeable + */ + if ((ctxt->inputNr == 1) && + (avail < HTML_PARSER_BIG_BUFFER_SIZE)) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) + goto done; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing char data\n"); +#endif + htmlParseCharData(ctxt); + } + } + if (cons == ctxt->nbChars) { + if (ctxt->node != NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "detected an error in element content\n"); + ctxt->wellFormed = 0; + } + NEXT; + break; + } + + break; + } + case XML_PARSER_END_TAG: + if (avail < 2) + goto done; + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; + htmlParseEndTag(ctxt); + if (ctxt->nameNr == 0) { + ctxt->instate = XML_PARSER_EPILOG; + } else { + ctxt->instate = XML_PARSER_CONTENT; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_CDATA_SECTION: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == CDATA\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_DTD: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == DTD\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_COMMENT: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == COMMENT\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_PI: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == PI\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_ENTITY_DECL: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == ENTITY_DECL\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_ENTITY_VALUE: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == ENTITY_VALUE\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering DTD\n"); +#endif + break; + case XML_PARSER_ATTRIBUTE_VALUE: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == ATTRIBUTE_VALUE\n"); + ctxt->instate = XML_PARSER_START_TAG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); +#endif + break; + case XML_PARSER_SYSTEM_LITERAL: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_IGNORE: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == XML_PARSER_IGNORE\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_PUBLIC_LITERAL: + xmlGenericError(xmlGenericErrorContext, + "HPP: internal error, state == XML_PARSER_LITERAL\n"); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: entering CONTENT\n"); +#endif + break; + + } + } +done: + if ((avail == 0) && (terminate)) { + htmlAutoCloseOnEnd(ctxt); + if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { + /* + * SAX: end of the document processing. + */ + ctxt->instate = XML_PARSER_EOF; + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + } + } + if ((ctxt->myDoc != NULL) && + ((terminate) || (ctxt->instate == XML_PARSER_EOF) || + (ctxt->instate == XML_PARSER_EPILOG))) { + xmlDtdPtr dtd; + dtd = xmlGetIntSubset(ctxt->myDoc); + if (dtd == NULL) + ctxt->myDoc->intSubset = + xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "HTML", + BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", + BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); + } +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); +#endif + return(ret); +} + +/** + * htmlParseChunk: + * @ctxt: an XML parser context + * @chunk: an char array + * @size: the size in byte of the chunk + * @terminate: last chunk indicator + * + * Parse a Chunk of memory + * + * Returns zero if no error, the xmlParserErrors otherwise. + */ +int +htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, + int terminate) { + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { + int base = ctxt->input->base - ctxt->input->buf->buffer->content; + int cur = ctxt->input->cur - ctxt->input->base; + + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + ctxt->input->base = ctxt->input->buf->buffer->content + base; + ctxt->input->cur = ctxt->input->base + cur; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); +#endif + + if ((terminate) || (ctxt->input->buf->buffer->use > 80)) + htmlParseTryOrFinish(ctxt, terminate); + } else if (ctxt->instate != XML_PARSER_EOF) { + xmlParserInputBufferPush(ctxt->input->buf, 0, ""); + htmlParseTryOrFinish(ctxt, terminate); + } + if (terminate) { + if ((ctxt->instate != XML_PARSER_EOF) && + (ctxt->instate != XML_PARSER_EPILOG) && + (ctxt->instate != XML_PARSER_MISC)) { + ctxt->errNo = XML_ERR_DOCUMENT_END; + ctxt->wellFormed = 0; + } + if (ctxt->instate != XML_PARSER_EOF) { + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + } + ctxt->instate = XML_PARSER_EOF; + } + return((xmlParserErrors) ctxt->errNo); +} + +/************************************************************************ + * * + * User entry points * + * * + ************************************************************************/ + +/** + * htmlCreatePushParserCtxt: + * @sax: a SAX handler + * @user_data: The user data returned on SAX callbacks + * @chunk: a pointer to an array of chars + * @size: number of chars in the array + * @filename: an optional file name or URI + * @enc: an optional encoding + * + * Create a parser context for using the HTML parser in push mode + * To allow content encoding detection, @size should be >= 4 + * The value of @filename is used for fetching external entities + * and error/warning reports. + * + * Returns the new parser context or NULL + */ +htmlParserCtxtPtr +htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, + const char *chunk, int size, const char *filename, + xmlCharEncoding enc) { + htmlParserCtxtPtr ctxt; + htmlParserInputPtr inputStream; + xmlParserInputBufferPtr buf; + + xmlInitParser(); + + buf = xmlAllocParserInputBuffer(enc); + if (buf == NULL) return(NULL); + + ctxt = (htmlParserCtxtPtr) xmlMalloc(sizeof(htmlParserCtxt)); + if (ctxt == NULL) { + xmlFree(buf); + return(NULL); + } + memset(ctxt, 0, sizeof(htmlParserCtxt)); + htmlInitParserCtxt(ctxt); + if (sax != NULL) { + if (ctxt->sax != &htmlDefaultSAXHandler) + xmlFree(ctxt->sax); + ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler)); + if (ctxt->sax == NULL) { + xmlFree(buf); + xmlFree(ctxt); + return(NULL); + } + memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler)); + if (user_data != NULL) + ctxt->userData = user_data; + } + if (filename == NULL) { + ctxt->directory = NULL; + } else { + ctxt->directory = xmlParserGetDirectory(filename); + } + + inputStream = htmlNewInputStream(ctxt); + if (inputStream == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + if (filename == NULL) + inputStream->filename = NULL; + else + inputStream->filename = xmlMemStrdup(filename); + inputStream->buf = buf; + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + + inputPush(ctxt, inputStream); + + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL)) { + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); +#endif + } + + return(ctxt); +} + +/** + * htmlSAXParseDoc: + * @cur: a pointer to an array of xmlChar + * @encoding: a free form C string describing the HTML document encoding, or NULL + * @sax: the SAX handler block + * @userData: if using SAX, this pointer will be provided on callbacks. + * + * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks + * to handle parse events. If sax is NULL, fallback to the default DOM + * behavior and return a tree. + * + * Returns the resulting document tree unless SAX is NULL or the document is + * not well formed. + */ + +htmlDocPtr +htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData) { + htmlDocPtr ret; + htmlParserCtxtPtr ctxt; + + xmlInitParser(); + + if (cur == NULL) return(NULL); + + + ctxt = htmlCreateDocParserCtxt(cur, encoding); + if (ctxt == NULL) return(NULL); + if (sax != NULL) { + ctxt->sax = sax; + ctxt->userData = userData; + } + + htmlParseDocument(ctxt); + ret = ctxt->myDoc; + if (sax != NULL) { + ctxt->sax = NULL; + ctxt->userData = NULL; + } + htmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * htmlParseDoc: + * @cur: a pointer to an array of xmlChar + * @encoding: a free form C string describing the HTML document encoding, or NULL + * + * parse an HTML in-memory document and build a tree. + * + * Returns the resulting document tree + */ + +htmlDocPtr +htmlParseDoc(xmlChar *cur, const char *encoding) { + return(htmlSAXParseDoc(cur, encoding, NULL, NULL)); +} + + +/** + * htmlCreateFileParserCtxt: + * @filename: the filename + * @encoding: a free form C string describing the HTML document encoding, or NULL + * + * Create a parser context for a file content. + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * + * Returns the new parser context or NULL + */ +htmlParserCtxtPtr +htmlCreateFileParserCtxt(const char *filename, const char *encoding) +{ + htmlParserCtxtPtr ctxt; + htmlParserInputPtr inputStream; + xmlParserInputBufferPtr buf; + /* htmlCharEncoding enc; */ + xmlChar *content, *content_line = (xmlChar *) "charset="; + + buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); + if (buf == NULL) return(NULL); + + ctxt = (htmlParserCtxtPtr) xmlMalloc(sizeof(htmlParserCtxt)); + if (ctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed\n"); + return(NULL); + } + memset(ctxt, 0, sizeof(htmlParserCtxt)); + htmlInitParserCtxt(ctxt); + inputStream = (htmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput)); + if (inputStream == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed\n"); + xmlFree(ctxt); + return(NULL); + } + memset(inputStream, 0, sizeof(htmlParserInput)); + + inputStream->filename = (char *) + xmlNormalizeWindowsPath((xmlChar *)filename); + inputStream->line = 1; + inputStream->col = 1; + inputStream->buf = buf; + inputStream->directory = NULL; + + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->free = NULL; + + inputPush(ctxt, inputStream); + + /* set encoding */ + if (encoding) { + content = xmlMalloc (xmlStrlen(content_line) + strlen(encoding) + 1); + if (content) { + strcpy ((char *)content, (char *)content_line); + strcat ((char *)content, (char *)encoding); + htmlCheckEncoding (ctxt, content); + xmlFree (content); + } + } + + return(ctxt); +} + +/** + * htmlSAXParseFile: + * @filename: the filename + * @encoding: a free form C string describing the HTML document encoding, or NULL + * @sax: the SAX handler block + * @userData: if using SAX, this pointer will be provided on callbacks. + * + * parse an HTML file and build a tree. Automatic support for ZLIB/Compress + * compressed document is provided by default if found at compile-time. + * It use the given SAX function block to handle the parsing callback. + * If sax is NULL, fallback to the default DOM tree building routines. + * + * Returns the resulting document tree unless SAX is NULL or the document is + * not well formed. + */ + +htmlDocPtr +htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr sax, + void *userData) { + htmlDocPtr ret; + htmlParserCtxtPtr ctxt; + htmlSAXHandlerPtr oldsax = NULL; + + xmlInitParser(); + + ctxt = htmlCreateFileParserCtxt(filename, encoding); + if (ctxt == NULL) return(NULL); + if (sax != NULL) { + oldsax = ctxt->sax; + ctxt->sax = sax; + ctxt->userData = userData; + } + + htmlParseDocument(ctxt); + + ret = ctxt->myDoc; + if (sax != NULL) { + ctxt->sax = oldsax; + ctxt->userData = NULL; + } + htmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * htmlParseFile: + * @filename: the filename + * @encoding: a free form C string describing the HTML document encoding, or NULL + * + * parse an HTML file and build a tree. Automatic support for ZLIB/Compress + * compressed document is provided by default if found at compile-time. + * + * Returns the resulting document tree + */ + +htmlDocPtr +htmlParseFile(const char *filename, const char *encoding) { + return(htmlSAXParseFile(filename, encoding, NULL, NULL)); +} + +/** + * htmlHandleOmittedElem: + * @val: int 0 or 1 + * + * Set and return the previous value for handling HTML omitted tags. + * + * Returns the last value for 0 for no handling, 1 for auto insertion. + */ + +int +htmlHandleOmittedElem(int val) { + int old = htmlOmittedDefaultValue; + + htmlOmittedDefaultValue = val; + return(old); +} + +#endif /* LIBXML_HTML_ENABLED */ diff --git a/bundle/libxml/HTMLtree.c b/bundle/libxml/HTMLtree.c new file mode 100644 index 0000000000..fe69b3dee9 --- /dev/null +++ b/bundle/libxml/HTMLtree.c @@ -0,0 +1,1099 @@ +/* + * HTMLtree.c : implementation of access function for an HTML tree. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + + +#define IN_LIBXML +#include "libxml.h" +#ifdef LIBXML_HTML_ENABLED + +#include <string.h> /* for memset() only ! */ + +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/HTMLparser.h> +#include <libxml/HTMLtree.h> +#include <libxml/entities.h> +#include <libxml/valid.h> +#include <libxml/xmlerror.h> +#include <libxml/parserInternals.h> +#include <libxml/globals.h> +#include <libxml/uri.h> + +/************************************************************************ + * * + * Getting/Setting encoding meta tags * + * * + ************************************************************************/ + +/** + * htmlGetMetaEncoding: + * @doc: the document + * + * Encoding definition lookup in the Meta tags + * + * Returns the current encoding as flagged in the HTML source + */ +const xmlChar * +htmlGetMetaEncoding(htmlDocPtr doc) { + htmlNodePtr cur; + const xmlChar *content; + const xmlChar *encoding; + + if (doc == NULL) + return(NULL); + cur = doc->children; + + /* + * Search the html + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrEqual(cur->name, BAD_CAST"html")) + break; + if (xmlStrEqual(cur->name, BAD_CAST"head")) + goto found_head; + if (xmlStrEqual(cur->name, BAD_CAST"meta")) + goto found_meta; + } + cur = cur->next; + } + if (cur == NULL) + return(NULL); + cur = cur->children; + + /* + * Search the head + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrEqual(cur->name, BAD_CAST"head")) + break; + if (xmlStrEqual(cur->name, BAD_CAST"meta")) + goto found_meta; + } + cur = cur->next; + } + if (cur == NULL) + return(NULL); +found_head: + cur = cur->children; + + /* + * Search the meta elements + */ +found_meta: + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrEqual(cur->name, BAD_CAST"meta")) { + xmlAttrPtr attr = cur->properties; + int http; + const xmlChar *value; + + content = NULL; + http = 0; + while (attr != NULL) { + if ((attr->children != NULL) && + (attr->children->type == XML_TEXT_NODE) && + (attr->children->next == NULL)) { + value = attr->children->content; + if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) + && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) + http = 1; + else if ((value != NULL) + && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) + content = value; + if ((http != 0) && (content != NULL)) + goto found_content; + } + attr = attr->next; + } + } + } + cur = cur->next; + } + return(NULL); + +found_content: + encoding = xmlStrstr(content, BAD_CAST"charset="); + if (encoding == NULL) + encoding = xmlStrstr(content, BAD_CAST"Charset="); + if (encoding == NULL) + encoding = xmlStrstr(content, BAD_CAST"CHARSET="); + if (encoding != NULL) { + encoding += 8; + } else { + encoding = xmlStrstr(content, BAD_CAST"charset ="); + if (encoding == NULL) + encoding = xmlStrstr(content, BAD_CAST"Charset ="); + if (encoding == NULL) + encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); + if (encoding != NULL) + encoding += 9; + } + if (encoding != NULL) { + while ((*encoding == ' ') || (*encoding == '\t')) encoding++; + } + return(encoding); +} + +/** + * htmlSetMetaEncoding: + * @doc: the document + * @encoding: the encoding string + * + * Sets the current encoding in the Meta tags + * NOTE: this will not change the document content encoding, just + * the META flag associated. + * + * Returns 0 in case of success and -1 in case of error + */ +int +htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { + htmlNodePtr cur, meta; + const xmlChar *content; + char newcontent[100]; + + + if (doc == NULL) + return(-1); + + if (encoding != NULL) { + snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", + encoding); + newcontent[sizeof(newcontent) - 1] = 0; + } + + cur = doc->children; + + /* + * Search the html + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0) + break; + if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) + goto found_head; + if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) + goto found_meta; + } + cur = cur->next; + } + if (cur == NULL) + return(-1); + cur = cur->children; + + /* + * Search the head + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) + break; + if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) + goto found_meta; + } + cur = cur->next; + } + if (cur == NULL) + return(-1); +found_head: + if (cur->children == NULL) { + if (encoding == NULL) + return(0); + meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); + xmlAddChild(cur, meta); + xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); + xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); + return(0); + } + cur = cur->children; + +found_meta: + if (encoding != NULL) { + /* + * Create a new Meta element with the right attributes + */ + + meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); + xmlAddPrevSibling(cur, meta); + xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); + xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); + } + + /* + * Search and destroy all the remaining the meta elements carrying + * encoding informations + */ + while (cur != NULL) { + if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { + if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { + xmlAttrPtr attr = cur->properties; + int http; + const xmlChar *value; + + content = NULL; + http = 0; + while (attr != NULL) { + if ((attr->children != NULL) && + (attr->children->type == XML_TEXT_NODE) && + (attr->children->next == NULL)) { + value = attr->children->content; + if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) + && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) + http = 1; + else + { + if ((value != NULL) && + (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) + content = value; + } + if ((http != 0) && (content != NULL)) + break; + } + attr = attr->next; + } + if ((http != 0) && (content != NULL)) { + meta = cur; + cur = cur->next; + xmlUnlinkNode(meta); + xmlFreeNode(meta); + continue; + } + + } + } + cur = cur->next; + } + return(0); +} + +/** + * booleanHTMLAttrs: + * + * These are the HTML attributes which will be output + * in minimized form, i.e. <option selected="selected"> will be + * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method" + * + */ +static const char* htmlBooleanAttrs[] = { + "checked", "compact", "declare", "defer", "disabled", "ismap", + "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", + "selected", NULL +}; + + +/** + * htmlIsBooleanAttr: + * @name: the name of the attribute to check + * + * Determine if a given attribute is a boolean attribute. + * + * returns: false if the attribute is not boolean, true otherwise. + */ +int +htmlIsBooleanAttr(const xmlChar *name) +{ + int i = 0; + + while (htmlBooleanAttrs[i] != NULL) { + if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) + return 1; + i++; + } + return 0; +} + +/************************************************************************ + * * + * Dumping HTML tree content to a simple buffer * + * * + ************************************************************************/ + +static int +htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int format); + +/** + * htmlNodeDumpFormat: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node + * @format: should formatting spaces been added + * + * Dump an HTML node, recursive behaviour,children are printed too. + * + * Returns the number of byte written or -1 in case of error + */ +static int +htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int format) { + unsigned int use; + int ret; + xmlOutputBufferPtr outbuf; + + if (cur == NULL) { + return (-1); + } + if (buf == NULL) { + return (-1); + } + outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); + if (outbuf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlNodeDumpFormat: out of memory!\n"); + return (-1); + } + memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); + outbuf->buffer = buf; + outbuf->encoder = NULL; + outbuf->writecallback = NULL; + outbuf->closecallback = NULL; + outbuf->context = NULL; + outbuf->written = 0; + + use = buf->use; + htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); + xmlFree(outbuf); + ret = buf->use - use; + return (ret); +} + +/** + * htmlNodeDump: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node + * + * Dump an HTML node, recursive behaviour,children are printed too, + * and formatting returns are added. + * + * Returns the number of byte written or -1 in case of error + */ +int +htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { + return(htmlNodeDumpFormat(buf, doc, cur, 1)); +} + +/** + * htmlNodeDumpFileFormat: + * @out: the FILE pointer + * @doc: the document + * @cur: the current node + * @encoding: the document encoding + * @format: should formatting spaces been added + * + * Dump an HTML node, recursive behaviour,children are printed too. + * + * TODO: if encoding == NULL try to save in the doc encoding + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding, int format) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + return(-1); + } + } + + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFile(out, handler); + if (buf == NULL) return(0); + + htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlNodeDumpFile: + * @out: the FILE pointer + * @doc: the document + * @cur: the current node + * + * Dump an HTML node, recursive behaviour,children are printed too, + * and formatting returns are added. + */ +void +htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { + htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); +} + +/** + * htmlDocDumpMemory: + * @cur: the document + * @mem: OUT: the memory pointer + * @size: OUT: the memory length + * + * Dump an HTML document in memory and return the xmlChar * and it's size. + * It's up to the caller to free the memory. + */ +void +htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + const char *encoding; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "htmlDocDumpMemory : document == NULL\n"); +#endif + *mem = NULL; + *size = 0; + return; + } + + encoding = (const char *) htmlGetMetaEncoding(cur); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != cur->charset) { + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + /* + * Not supported yet + */ + *mem = NULL; + *size = 0; + return; + } + + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) { + *mem = NULL; + *size = 0; + return; + } + } + } + + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + + buf = xmlAllocOutputBuffer(handler); + if (buf == NULL) { + *mem = NULL; + *size = 0; + return; + } + + htmlDocContentDumpOutput(buf, cur, NULL); + xmlOutputBufferFlush(buf); + if (buf->conv != NULL) { + *size = buf->conv->use; + *mem = xmlStrndup(buf->conv->content, *size); + } else { + *size = buf->buffer->use; + *mem = xmlStrndup(buf->buffer->content, *size); + } + (void)xmlOutputBufferClose(buf); +} + + +/************************************************************************ + * * + * Dumping HTML tree content to an I/O output buffer * + * * + ************************************************************************/ + + +/** + * htmlDtdDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @encoding: the encoding string + * + * TODO: check whether encoding is needed + * + * Dump the HTML document DTD, if any. + */ +static void +htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + const char *encoding ATTRIBUTE_UNUSED) { + xmlDtdPtr cur = doc->intSubset; + + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlDtdDumpOutput : no internal subset\n"); + return; + } + xmlOutputBufferWriteString(buf, "<!DOCTYPE "); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->ExternalID != NULL) { + xmlOutputBufferWriteString(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); + if (cur->SystemID != NULL) { + xmlOutputBufferWriteString(buf, " "); + xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); + } + } else if (cur->SystemID != NULL) { + xmlOutputBufferWriteString(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); + } + xmlOutputBufferWriteString(buf, ">\n"); +} + +/** + * htmlAttrDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the attribute pointer + * @encoding: the encoding string + * + * Dump an HTML attribute + */ +static void +htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, + const char *encoding ATTRIBUTE_UNUSED) { + xmlChar *value; + + /* + * TODO: The html output method should not escape a & character + * occurring in an attribute value immediately followed by + * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + */ + + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlAttrDumpOutput : property == NULL\n"); + return; + } + xmlOutputBufferWriteString(buf, " "); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { + value = xmlNodeListGetString(doc, cur->children, 0); + if (value) { + xmlOutputBufferWriteString(buf, "="); + if ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || + (!xmlStrcasecmp(cur->name, BAD_CAST "src"))) { + xmlChar *escaped; + xmlChar *tmp = value; + + while (IS_BLANK(*tmp)) tmp++; + + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&"); + if (escaped != NULL) { + xmlBufferWriteQuotedString(buf->buffer, escaped); + xmlFree(escaped); + } else { + xmlBufferWriteQuotedString(buf->buffer, value); + } + } else { + xmlBufferWriteQuotedString(buf->buffer, value); + } + xmlFree(value); + } else { + xmlOutputBufferWriteString(buf, "=\"\""); + } + } +} + +/** + * htmlAttrListDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the first attribute pointer + * @encoding: the encoding string + * + * Dump a list of HTML attributes + */ +static void +htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlAttrListDumpOutput : property == NULL\n"); + return; + } + while (cur != NULL) { + htmlAttrDumpOutput(buf, doc, cur, encoding); + cur = cur->next; + } +} + + + +/** + * htmlNodeListDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the first node + * @encoding: the encoding string + * @format: should formatting spaces been added + * + * Dump an HTML node list, recursive behaviour,children are printed too. + */ +static void +htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding, int format) { + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlNodeListDumpOutput : node == NULL\n"); + return; + } + while (cur != NULL) { + htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); + cur = cur->next; + } +} + +/** + * htmlNodeDumpFormatOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node + * @encoding: the encoding string + * @format: should formatting spaces been added + * + * Dump an HTML node, recursive behaviour,children are printed too. + */ +void +htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding, int format) { + const htmlElemDesc * info; + + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlNodeDumpFormatOutput : node == NULL\n"); + return; + } + /* + * Special cases. + */ + if (cur->type == XML_DTD_NODE) + return; + if (cur->type == XML_HTML_DOCUMENT_NODE) { + htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); + return; + } + if (cur->type == HTML_TEXT_NODE) { + if (cur->content != NULL) { + if (((cur->name == (const xmlChar *)xmlStringText) || + (cur->name != (const xmlChar *)xmlStringTextNoenc)) && + ((cur->parent == NULL) || + ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && + (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { + xmlChar *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } else { + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + } + return; + } + if (cur->type == HTML_COMMENT_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, "<!--"); + xmlOutputBufferWriteString(buf, (const char *)cur->content); + xmlOutputBufferWriteString(buf, "-->"); + } + return; + } + if (cur->type == HTML_PI_NODE) { + if (cur->name == NULL) + return; + xmlOutputBufferWriteString(buf, "<?"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, " "); + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + xmlOutputBufferWriteString(buf, ">"); + return; + } + if (cur->type == HTML_ENTITY_REF_NODE) { + xmlOutputBufferWriteString(buf, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ";"); + return; + } + if (cur->type == HTML_PRESERVE_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + return; + } + + /* + * Get specific HTML info for that node. + */ + info = htmlTagLookup(cur->name); + + xmlOutputBufferWriteString(buf, "<"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->properties != NULL) + htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); + + if ((info != NULL) && (info->empty)) { + xmlOutputBufferWriteString(buf, ">"); + if ((format) && (!info->isinline) && (cur->next != NULL)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && + (cur->parent != NULL) && + (!xmlStrEqual(cur->parent->name, BAD_CAST "pre"))) + xmlOutputBufferWriteString(buf, "\n"); + } + return; + } + if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && + (cur->children == NULL)) { + if ((info != NULL) && (info->saveEndTag != 0) && + (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && + (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { + xmlOutputBufferWriteString(buf, ">"); + } else { + xmlOutputBufferWriteString(buf, "></"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); + } + if ((format) && (cur->next != NULL) && + (info != NULL) && (!info->isinline)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && + (cur->parent != NULL) && + (!xmlStrEqual(cur->parent->name, BAD_CAST "pre"))) + xmlOutputBufferWriteString(buf, "\n"); + } + return; + } + xmlOutputBufferWriteString(buf, ">"); + if ((cur->type != XML_ELEMENT_NODE) && + (cur->content != NULL)) { + /* + * Uses the OutputBuffer property to automatically convert + * invalids to charrefs + */ + + xmlOutputBufferWriteString(buf, (const char *) cur->content); + } + if (cur->children != NULL) { + if ((format) && (info != NULL) && (!info->isinline) && + (cur->children->type != HTML_TEXT_NODE) && + (cur->children->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last) && + (!xmlStrEqual(cur->name, BAD_CAST "pre"))) + xmlOutputBufferWriteString(buf, "\n"); + htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); + if ((format) && (info != NULL) && (!info->isinline) && + (cur->last->type != HTML_TEXT_NODE) && + (cur->last->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last) && + (!xmlStrEqual(cur->name, BAD_CAST "pre"))) + xmlOutputBufferWriteString(buf, "\n"); + } + xmlOutputBufferWriteString(buf, "</"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); + if ((format) && (info != NULL) && (!info->isinline) && + (cur->next != NULL)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && + (cur->parent != NULL) && + (!xmlStrEqual(cur->parent->name, BAD_CAST "pre"))) + xmlOutputBufferWriteString(buf, "\n"); + } +} + +/** + * htmlNodeDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node + * @encoding: the encoding string + * + * Dump an HTML node, recursive behaviour,children are printed too, + * and formatting returns/spaces are added. + */ +void +htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding) { + htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); +} + +/** + * htmlDocContentDumpFormatOutput: + * @buf: the HTML buffer output + * @cur: the document + * @encoding: the encoding string + * @format: should formatting spaces been added + * + * Dump an HTML document. + */ +void +htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + const char *encoding, int format) { + int type; + + /* + * force to output the stuff as HTML, especially for entities + */ + type = cur->type; + cur->type = XML_HTML_DOCUMENT_NODE; + if (cur->intSubset != NULL) { + htmlDtdDumpOutput(buf, cur, NULL); + } + if (cur->children != NULL) { + htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); + } + xmlOutputBufferWriteString(buf, "\n"); + cur->type = (xmlElementType) type; +} + +/** + * htmlDocContentDumpOutput: + * @buf: the HTML buffer output + * @cur: the document + * @encoding: the encoding string + * + * Dump an HTML document. Formating return/spaces are added. + */ +void +htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + const char *encoding) { + htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); +} + +/************************************************************************ + * * + * Saving functions front-ends * + * * + ************************************************************************/ + +/** + * htmlDocDump: + * @f: the FILE* + * @cur: the document + * + * Dump an HTML document to an open FILE. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlDocDump(FILE *f, xmlDocPtr cur) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + const char *encoding; + int ret; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "htmlDocDump : document == NULL\n"); +#endif + return(-1); + } + + encoding = (const char *) htmlGetMetaEncoding(cur); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != cur->charset) { + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + /* + * Not supported yet + */ + return(-1); + } + + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + return(-1); + } + } + + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + + buf = xmlOutputBufferCreateFile(f, handler); + if (buf == NULL) return(-1); + htmlDocContentDumpOutput(buf, cur, NULL); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlSaveFile: + * @filename: the filename (or URL) + * @cur: the document + * + * Dump an HTML document to a file. If @filename is "-" the stdout file is + * used. + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlSaveFile(const char *filename, xmlDocPtr cur) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + const char *encoding; + int ret; + + encoding = (const char *) htmlGetMetaEncoding(cur); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != cur->charset) { + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + /* + * Not supported yet + */ + return(-1); + } + + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + return(-1); + } + } + + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); + if (buf == NULL) return(0); + + htmlDocContentDumpOutput(buf, cur, NULL); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlSaveFileFormat: + * @filename: the filename + * @cur: the document + * @format: should formatting spaces been added + * @encoding: the document encoding + * + * Dump an HTML document to a file using a given encoding. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlSaveFileFormat(const char *filename, xmlDocPtr cur, + const char *encoding, int format) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != cur->charset) { + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + /* + * Not supported yet + */ + return(-1); + } + + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + return(-1); + htmlSetMetaEncoding(cur, (const xmlChar *) encoding); + } + } else { + htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); + } + + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFilename(filename, handler, 0); + if (buf == NULL) return(0); + + htmlDocContentDumpFormatOutput(buf, cur, encoding, format); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlSaveFileEnc: + * @filename: the filename + * @cur: the document + * @encoding: the document encoding + * + * Dump an HTML document to a file using a given encoding + * and formatting returns/spaces are added. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { + return(htmlSaveFileFormat(filename, cur, encoding, 1)); +} + + + +#endif /* LIBXML_HTML_ENABLED */ diff --git a/bundle/libxml/SAX.c b/bundle/libxml/SAX.c new file mode 100644 index 0000000000..785ea0ba8c --- /dev/null +++ b/bundle/libxml/SAX.c @@ -0,0 +1,2015 @@ +/* + * SAX.c : Default SAX handler to build a tree. + * + * See Copyright for the status of this software. + * + * Daniel Veillard <daniel@veillard.com> + */ + + +#define IN_LIBXML +#include "libxml.h" +#include <stdlib.h> +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/valid.h> +#include <libxml/entities.h> +#include <libxml/xmlerror.h> +#include <libxml/debugXML.h> +#include <libxml/xmlIO.h> +#include <libxml/SAX.h> +#include <libxml/uri.h> +#include <libxml/valid.h> +#include <libxml/HTMLtree.h> +#include <libxml/globals.h> + +/* #define DEBUG_SAX */ +/* #define DEBUG_SAX_TREE */ + +/** + * getPublicId: + * @ctx: the user data (XML parser context) + * + * Provides the public ID e.g. "-//SGMLSOURCE//DTD DEMO//EN" + * + * Returns a xmlChar * + */ +const xmlChar * +getPublicId(void *ctx ATTRIBUTE_UNUSED) +{ + /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ + return(NULL); +} + +/** + * getSystemId: + * @ctx: the user data (XML parser context) + * + * Provides the system ID, basically URL or filename e.g. + * http://www.sgmlsource.com/dtds/memo.dtd + * + * Returns a xmlChar * + */ +const xmlChar * +getSystemId(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return((const xmlChar *) ctxt->input->filename); +} + +/** + * getLineNumber: + * @ctx: the user data (XML parser context) + * + * Provide the line number of the current parsing point. + * + * Returns an int + */ +int +getLineNumber(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->input->line); +} + +/** + * getColumnNumber: + * @ctx: the user data (XML parser context) + * + * Provide the column number of the current parsing point. + * + * Returns an int + */ +int +getColumnNumber(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->input->col); +} + +/** + * isStandalone: + * @ctx: the user data (XML parser context) + * + * Is this document tagged standalone ? + * + * Returns 1 if true + */ +int +isStandalone(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->myDoc->standalone == 1); +} + +/** + * hasInternalSubset: + * @ctx: the user data (XML parser context) + * + * Does this document has an internal subset + * + * Returns 1 if true + */ +int +hasInternalSubset(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->myDoc->intSubset != NULL); +} + +/** + * hasExternalSubset: + * @ctx: the user data (XML parser context) + * + * Does this document has an external subset + * + * Returns 1 if true + */ +int +hasExternalSubset(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->myDoc->extSubset != NULL); +} + +/** + * internalSubset: + * @ctx: the user data (XML parser context) + * @name: the root element name + * @ExternalID: the external ID + * @SystemID: the SYSTEM ID (e.g. filename or URL) + * + * Callback on internal subset declaration. + */ +void +internalSubset(void *ctx, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlDtdPtr dtd; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.internalSubset(%s, %s, %s)\n", + name, ExternalID, SystemID); +#endif + + if (ctxt->myDoc == NULL) + return; + dtd = xmlGetIntSubset(ctxt->myDoc); + if (dtd != NULL) { + if (ctxt->html) + return; + xmlUnlinkNode((xmlNodePtr) dtd); + xmlFreeDtd(dtd); + ctxt->myDoc->intSubset = NULL; + } + ctxt->myDoc->intSubset = + xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID); +} + +/** + * externalSubset: + * @ctx: the user data (XML parser context) + * @name: the root element name + * @ExternalID: the external ID + * @SystemID: the SYSTEM ID (e.g. filename or URL) + * + * Callback on external subset declaration. + */ +void +externalSubset(void *ctx, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.externalSubset(%s, %s, %s)\n", + name, ExternalID, SystemID); +#endif + if (((ExternalID != NULL) || (SystemID != NULL)) && + (((ctxt->validate) || (ctxt->loadsubset != 0)) && + (ctxt->wellFormed && ctxt->myDoc))) { + /* + * Try to fetch and parse the external subset. + */ + xmlParserInputPtr oldinput; + int oldinputNr; + int oldinputMax; + xmlParserInputPtr *oldinputTab; + xmlParserInputPtr input = NULL; + xmlCharEncoding enc; + int oldcharset; + + /* + * Ask the Entity resolver to load the damn thing + */ + if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) + input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, + SystemID); + if (input == NULL) { + return; + } + + xmlNewDtd(ctxt->myDoc, name, ExternalID, SystemID); + + /* + * make sure we won't destroy the main document context + */ + oldinput = ctxt->input; + oldinputNr = ctxt->inputNr; + oldinputMax = ctxt->inputMax; + oldinputTab = ctxt->inputTab; + oldcharset = ctxt->charset; + + ctxt->inputTab = (xmlParserInputPtr *) + xmlMalloc(5 * sizeof(xmlParserInputPtr)); + if (ctxt->inputTab == NULL) { + ctxt->errNo = XML_ERR_NO_MEMORY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "externalSubset: out of memory\n"); + ctxt->errNo = XML_ERR_NO_MEMORY; + ctxt->input = oldinput; + ctxt->inputNr = oldinputNr; + ctxt->inputMax = oldinputMax; + ctxt->inputTab = oldinputTab; + ctxt->charset = oldcharset; + return; + } + ctxt->inputNr = 0; + ctxt->inputMax = 5; + ctxt->input = NULL; + xmlPushInput(ctxt, input); + + /* + * On the fly encoding conversion if needed + */ + enc = xmlDetectCharEncoding(ctxt->input->cur, 4); + xmlSwitchEncoding(ctxt, enc); + + if (input->filename == NULL) + input->filename = (char *) xmlStrdup(SystemID); + input->line = 1; + input->col = 1; + input->base = ctxt->input->cur; + input->cur = ctxt->input->cur; + input->free = NULL; + + /* + * let's parse that entity knowing it's an external subset. + */ + xmlParseExternalSubset(ctxt, ExternalID, SystemID); + + /* + * Free up the external entities + */ + + while (ctxt->inputNr > 1) + xmlPopInput(ctxt); + xmlFreeInputStream(ctxt->input); + xmlFree(ctxt->inputTab); + + /* + * Restore the parsing context of the main entity + */ + ctxt->input = oldinput; + ctxt->inputNr = oldinputNr; + ctxt->inputMax = oldinputMax; + ctxt->inputTab = oldinputTab; + ctxt->charset = oldcharset; + /* ctxt->wellFormed = oldwellFormed; */ + } +} + +/** + * resolveEntity: + * @ctx: the user data (XML parser context) + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * The entity loader, to control the loading of external entities, + * the application can either: + * - override this resolveEntity() callback in the SAX block + * - or better use the xmlSetExternalEntityLoader() function to + * set up it's own entity resolution routine + * + * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. + */ +xmlParserInputPtr +resolveEntity(void *ctx, const xmlChar *publicId, const xmlChar *systemId) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr ret; + xmlChar *URI; + const char *base = NULL; + + if (ctxt->input != NULL) + base = ctxt->input->filename; + if (base == NULL) + base = ctxt->directory; + + URI = xmlBuildURI(systemId, (const xmlChar *) base); + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.resolveEntity(%s, %s)\n", publicId, systemId); +#endif + + ret = xmlLoadExternalEntity((const char *) URI, + (const char *) publicId, ctxt); + if (URI != NULL) + xmlFree(URI); + return(ret); +} + +/** + * getEntity: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Get an entity by name + * + * Returns the xmlEntityPtr if found. + */ +xmlEntityPtr +getEntity(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlEntityPtr ret; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.getEntity(%s)\n", name); +#endif + + if ((ctxt->myDoc != NULL) && (ctxt->myDoc->standalone == 1)) { + if (ctxt->inSubset == 2) { + ctxt->myDoc->standalone = 0; + ret = xmlGetDocEntity(ctxt->myDoc, name); + ctxt->myDoc->standalone = 1; + } else { + ret = xmlGetDocEntity(ctxt->myDoc, name); + if (ret == NULL) { + ctxt->myDoc->standalone = 0; + ret = xmlGetDocEntity(ctxt->myDoc, name); + if (ret != NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Entity(%s) document marked standalone but require external subset\n", + name); + ctxt->valid = 0; + ctxt->wellFormed = 0; + } + ctxt->myDoc->standalone = 1; + } + } + } else { + ret = xmlGetDocEntity(ctxt->myDoc, name); + } + if ((ret != NULL) && (ctxt->validate) && (ret->children == NULL) && + (ret->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + /* + * for validation purposes we really need to fetch and + * parse the external entity + */ + xmlNodePtr children; + + xmlParseCtxtExternalEntity(ctxt, ret->URI, ret->ExternalID, &children); + xmlAddChildList((xmlNodePtr) ret, children); + } + return(ret); +} + +/** + * getParameterEntity: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Get a parameter entity by name + * + * Returns the xmlEntityPtr if found. + */ +xmlEntityPtr +getParameterEntity(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlEntityPtr ret; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.getParameterEntity(%s)\n", name); +#endif + + ret = xmlGetParameterEntity(ctxt->myDoc, name); + return(ret); +} + + +/** + * entityDecl: + * @ctx: the user data (XML parser context) + * @name: the entity name + * @type: the entity type + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @content: the entity value (without processing). + * + * An entity definition has been parsed + */ +void +entityDecl(void *ctx, const xmlChar *name, int type, + const xmlChar *publicId, const xmlChar *systemId, xmlChar *content) +{ + xmlEntityPtr ent; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.entityDecl(%s, %d, %s, %s, %s)\n", + name, type, publicId, systemId, content); +#endif + if (ctxt->inSubset == 1) { + ent = xmlAddDocEntity(ctxt->myDoc, name, type, publicId, + systemId, content); + if ((ent == NULL) && (ctxt->pedantic) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "Entity(%s) already defined in the internal subset\n", name); + if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) { + xmlChar *URI; + const char *base = NULL; + + if (ctxt->input != NULL) + base = ctxt->input->filename; + if (base == NULL) + base = ctxt->directory; + + URI = xmlBuildURI(systemId, (const xmlChar *) base); + ent->URI = URI; + } + } else if (ctxt->inSubset == 2) { + ent = xmlAddDtdEntity(ctxt->myDoc, name, type, publicId, + systemId, content); + if ((ent == NULL) && (ctxt->pedantic) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "Entity(%s) already defined in the external subset\n", name); + if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) { + xmlChar *URI; + const char *base = NULL; + + if (ctxt->input != NULL) + base = ctxt->input->filename; + if (base == NULL) + base = ctxt->directory; + + URI = xmlBuildURI(systemId, (const xmlChar *) base); + ent->URI = URI; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.entityDecl(%s) called while not in subset\n", name); + } +} + +/** + * attributeDecl: + * @ctx: the user data (XML parser context) + * @elem: the name of the element + * @fullname: the attribute name + * @type: the attribute type + * @def: the type of default value + * @defaultValue: the attribute default value + * @tree: the tree of enumerated value set + * + * An attribute definition has been parsed + */ +void +attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *fullname, + int type, int def, const xmlChar *defaultValue, + xmlEnumerationPtr tree) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlAttributePtr attr; + xmlChar *name = NULL, *prefix = NULL; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n", + elem, fullname, type, def, defaultValue); +#endif + name = xmlSplitQName(ctxt, fullname, &prefix); + ctxt->vctxt.valid = 1; + if (ctxt->inSubset == 1) + attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem, + name, prefix, (xmlAttributeType) type, + (xmlAttributeDefault) def, defaultValue, tree); + else if (ctxt->inSubset == 2) + attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, elem, + name, prefix, (xmlAttributeType) type, + (xmlAttributeDefault) def, defaultValue, tree); + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.attributeDecl(%s) called while not in subset\n", name); + return; + } + if (ctxt->vctxt.valid == 0) + ctxt->valid = 0; + if ((attr != NULL) && (ctxt->validate) && (ctxt->wellFormed) && + (ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset != NULL)) + ctxt->valid &= xmlValidateAttributeDecl(&ctxt->vctxt, ctxt->myDoc, + attr); + if (prefix != NULL) + xmlFree(prefix); + if (name != NULL) + xmlFree(name); +} + +/** + * elementDecl: + * @ctx: the user data (XML parser context) + * @name: the element name + * @type: the element type + * @content: the element value tree + * + * An element definition has been parsed + */ +void +elementDecl(void *ctx, const xmlChar * name, int type, + xmlElementContentPtr content) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlElementPtr elem = NULL; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.elementDecl(%s, %d, ...)\n", name, type); +#endif + + if (ctxt->inSubset == 1) + elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, + name, (xmlElementTypeVal) type, content); + else if (ctxt->inSubset == 2) + elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, + name, (xmlElementTypeVal) type, content); + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.elementDecl(%s) called while not in subset\n", + name); + return; + } + if (elem == NULL) + ctxt->valid = 0; + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= + xmlValidateElementDecl(&ctxt->vctxt, ctxt->myDoc, elem); +} + +/** + * notationDecl: + * @ctx: the user data (XML parser context) + * @name: The name of the notation + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * What to do when a notation declaration has been parsed. + */ +void +notationDecl(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNotationPtr nota = NULL; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId); +#endif + + if ((publicId == NULL) && (systemId == NULL)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.notationDecl(%s) externalID or PublicID missing\n", name); + ctxt->valid = 0; + ctxt->wellFormed = 0; + return; + } else if (ctxt->inSubset == 1) + nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name, + publicId, systemId); + else if (ctxt->inSubset == 2) + nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, name, + publicId, systemId); + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.notationDecl(%s) called while not in subset\n", name); + return; + } + if (nota == NULL) ctxt->valid = 0; + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateNotationDecl(&ctxt->vctxt, ctxt->myDoc, + nota); +} + +/** + * unparsedEntityDecl: + * @ctx: the user data (XML parser context) + * @name: The name of the entity + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @notationName: the name of the notation + * + * What to do when an unparsed entity declaration is parsed + */ +void +unparsedEntityDecl(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId, + const xmlChar *notationName) +{ + xmlEntityPtr ent; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", + name, publicId, systemId, notationName); +#endif +#if 0 + Done in xmlValidateDtdFinal now. + if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc) { + int ret; + ret = xmlValidateNotationUse(&ctxt->vctxt, ctxt->myDoc, + notationName); + if (ret == 0) { + ctxt->wellFormed = 0; + ctxt->valid = 0; + } + } +#endif + if (ctxt->inSubset == 1) { + ent = xmlAddDocEntity(ctxt->myDoc, name, + XML_EXTERNAL_GENERAL_UNPARSED_ENTITY, + publicId, systemId, notationName); + if ((ent == NULL) && (ctxt->pedantic) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "Entity(%s) already defined in the internal subset\n", name); + if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) { + xmlChar *URI; + const char *base = NULL; + + if (ctxt->input != NULL) + base = ctxt->input->filename; + if (base == NULL) + base = ctxt->directory; + + URI = xmlBuildURI(systemId, (const xmlChar *) base); + ent->URI = URI; + } + } else if (ctxt->inSubset == 2) { + ent = xmlAddDtdEntity(ctxt->myDoc, name, + XML_EXTERNAL_GENERAL_UNPARSED_ENTITY, + publicId, systemId, notationName); + if ((ent == NULL) && (ctxt->pedantic) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "Entity(%s) already defined in the external subset\n", name); + if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) { + xmlChar *URI; + const char *base = NULL; + + if (ctxt->input != NULL) + base = ctxt->input->filename; + if (base == NULL) + base = ctxt->directory; + + URI = xmlBuildURI(systemId, (const xmlChar *) base); + ent->URI = URI; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.unparsedEntityDecl(%s) called while not in subset\n", name); + } +} + +/** + * setDocumentLocator: + * @ctx: the user data (XML parser context) + * @loc: A SAX Locator + * + * Receive the document locator at startup, actually xmlDefaultSAXLocator + * Everything is available on the context, so this is useless in our case. + */ +void +setDocumentLocator(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED) +{ + /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.setDocumentLocator()\n"); +#endif +} + +/** + * startDocument: + * @ctx: the user data (XML parser context) + * + * called when the document start being processed. + */ +void +startDocument(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlDocPtr doc; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.startDocument()\n"); +#endif + if (ctxt->html) { + if (ctxt->myDoc == NULL) +#ifdef LIBXML_HTML_ENABLED + ctxt->myDoc = htmlNewDocNoDtD(NULL, NULL); +#else + xmlGenericError(xmlGenericErrorContext, + "libxml2 built without HTML support\n"); +#endif + } else { + doc = ctxt->myDoc = xmlNewDoc(ctxt->version); + if (doc != NULL) { + if (ctxt->encoding != NULL) + doc->encoding = xmlStrdup(ctxt->encoding); + else + doc->encoding = NULL; + doc->standalone = ctxt->standalone; + } + } + if ((ctxt->myDoc != NULL) && (ctxt->myDoc->URL == NULL) && + (ctxt->input != NULL) && (ctxt->input->filename != NULL)) { + ctxt->myDoc->URL = xmlStrdup((const xmlChar *) ctxt->input->filename); + } +} + +/** + * endDocument: + * @ctx: the user data (XML parser context) + * + * called when the document end has been detected. + */ +void +endDocument(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.endDocument()\n"); +#endif + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateDocumentFinal(&ctxt->vctxt, ctxt->myDoc); + + /* + * Grab the encoding if it was added on-the-fly + */ + if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) && + (ctxt->myDoc->encoding == NULL)) { + ctxt->myDoc->encoding = ctxt->encoding; + ctxt->encoding = NULL; + } + if ((ctxt->inputTab[0]->encoding != NULL) && (ctxt->myDoc != NULL) && + (ctxt->myDoc->encoding == NULL)) { + ctxt->myDoc->encoding = xmlStrdup(ctxt->inputTab[0]->encoding); + } + if ((ctxt->charset != XML_CHAR_ENCODING_NONE) && (ctxt->myDoc != NULL) && + (ctxt->myDoc->charset == XML_CHAR_ENCODING_NONE)) { + ctxt->myDoc->charset = ctxt->charset; + } +} + +/** + * my_attribute: + * @ctx: the user data (XML parser context) + * @fullname: The attribute name, including namespace prefix + * @value: The attribute value + * @prefix: the prefix on the element node + * + * Handle an attribute that has been read by the parser. + * The default handling is to convert the attribute into an + * DOM subtree and past it in a new xmlAttr element added to + * the element. + */ +static void +my_attribute(void *ctx, const xmlChar *fullname, const xmlChar *value, + const xmlChar *prefix) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlAttrPtr ret; + xmlChar *name; + xmlChar *ns; + xmlChar *nval; + xmlNsPtr namespace; + +/**************** +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.attribute(%s, %s)\n", fullname, value); +#endif + ****************/ + /* + * Split the full name into a namespace prefix and the tag name + */ + name = xmlSplitQName(ctxt, fullname, &ns); + + /* + * Do the last stage of the attribute normalization + * Needed for HTML too: + * http://www.w3.org/TR/html4/types.html#h-6.2 + */ + ctxt->vctxt.valid = 1; + nval = xmlValidCtxtNormalizeAttributeValue(&ctxt->vctxt, + ctxt->myDoc, ctxt->node, + fullname, value); + if (ctxt->vctxt.valid != 1) { + ctxt->valid = 0; + } + if (nval != NULL) + value = nval; + + /* + * Check whether it's a namespace definition + */ + if ((!ctxt->html) && (ns == NULL) && + (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') && + (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) { + xmlNsPtr nsret; + + if (value[0] != 0) { + xmlURIPtr uri; + + uri = xmlParseURI((const char *)value); + if (uri == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "nmlns: %s not a valid URI\n", value); + } else { + if (uri->scheme == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "nmlns: URI %s is not absolute\n", value); + } + xmlFreeURI(uri); + } + } + + /* a default namespace definition */ + nsret = xmlNewNs(ctxt->node, value, NULL); + + /* + * Validate also for namespace decls, they are attributes from + * an XML-1.0 perspective + */ + if (nsret != NULL && ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateOneNamespace(&ctxt->vctxt, ctxt->myDoc, + ctxt->node, prefix, nsret, value); + if (name != NULL) + xmlFree(name); + if (nval != NULL) + xmlFree(nval); + return; + } + if ((!ctxt->html) && + (ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') && + (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) { + xmlNsPtr nsret; + + if (value[0] == 0) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Empty namespace name for prefix %s\n", name); + } + /* a standard namespace definition */ + nsret = xmlNewNs(ctxt->node, value, name); + xmlFree(ns); + /* + * Validate also for namespace decls, they are attributes from + * an XML-1.0 perspective + */ + if (nsret != NULL && ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateOneNamespace(&ctxt->vctxt, ctxt->myDoc, + ctxt->node, prefix, nsret, value); + if (name != NULL) + xmlFree(name); + if (nval != NULL) + xmlFree(nval); + return; + } + + if (ns != NULL) + namespace = xmlSearchNs(ctxt->myDoc, ctxt->node, ns); + else { + namespace = NULL; + } + + /* !!!!!! <a toto:arg="" xmlns:toto="http://toto.com"> */ + ret = xmlNewNsPropEatName(ctxt->node, namespace, name, NULL); + + if (ret != NULL) { + if ((ctxt->replaceEntities == 0) && (!ctxt->html)) { + xmlNodePtr tmp; + + ret->children = xmlStringGetNodeList(ctxt->myDoc, value); + tmp = ret->children; + while (tmp != NULL) { + tmp->parent = (xmlNodePtr) ret; + if (tmp->next == NULL) + ret->last = tmp; + tmp = tmp->next; + } + } else if (value != NULL) { + ret->children = xmlNewDocText(ctxt->myDoc, value); + ret->last = ret->children; + if (ret->children != NULL) + ret->children->parent = (xmlNodePtr) ret; + } + } + + if ((!ctxt->html) && ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) { + + /* + * If we don't substitute entities, the validation should be + * done on a value with replaced entities anyway. + */ + if (!ctxt->replaceEntities) { + xmlChar *val; + + ctxt->depth++; + val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF, + 0,0,0); + ctxt->depth--; + + if (val == NULL) + ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, + ctxt->myDoc, ctxt->node, ret, value); + else { + xmlChar *nvalnorm; + + /* + * Do the last stage of the attribute normalization + * It need to be done twice ... it's an extra burden related + * to the ability to keep references in attributes + */ + nvalnorm = xmlValidNormalizeAttributeValue(ctxt->myDoc, + ctxt->node, fullname, val); + if (nvalnorm != NULL) { + xmlFree(val); + val = nvalnorm; + } + + ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, + ctxt->myDoc, ctxt->node, ret, val); + xmlFree(val); + } + } else { + ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc, + ctxt->node, ret, value); + } + } else if (((ctxt->replaceEntities == 0) && (ctxt->external != 2)) || + ((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0))) { + /* + * when validating, the ID registration is done at the attribute + * validation level. Otherwise we have to do specific handling here. + */ + if (xmlIsID(ctxt->myDoc, ctxt->node, ret)) + xmlAddID(&ctxt->vctxt, ctxt->myDoc, value, ret); + else if (xmlIsRef(ctxt->myDoc, ctxt->node, ret)) + xmlAddRef(&ctxt->vctxt, ctxt->myDoc, value, ret); + } + + if (nval != NULL) + xmlFree(nval); + if (ns != NULL) + xmlFree(ns); +} + +/** + * attribute: + * @ctx: the user data (XML parser context) + * @fullname: The attribute name, including namespace prefix + * @value: The attribute value + * + * Handle an attribute that has been read by the parser. + * The default handling is to convert the attribute into an + * DOM subtree and past it in a new xmlAttr element added to + * the element. + */ +void +attribute(void *ctx, const xmlChar *fullname, const xmlChar *value) +{ + my_attribute(ctx, fullname, value, NULL); +} + +/* + * xmlCheckDefaultedAttributes: + * + * Check defaulted attributes from the DTD + */ +static void +xmlCheckDefaultedAttributes(xmlParserCtxtPtr ctxt, const xmlChar *name, + const xmlChar *prefix, const xmlChar **atts) { + xmlElementPtr elemDecl; + const xmlChar *att; + int internal = 1; + int i; + + elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->intSubset, name, prefix); + if (elemDecl == NULL) { + elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->extSubset, name, prefix); + internal = 0; + } + +process_external_subset: + + if (elemDecl != NULL) { + xmlAttributePtr attr = elemDecl->attributes; + /* + * Check against defaulted attributes from the external subset + * if the document is stamped as standalone + */ + if ((ctxt->myDoc->standalone == 1) && + (ctxt->myDoc->extSubset != NULL) && + (ctxt->validate)) { + while (attr != NULL) { + if ((attr->defaultValue != NULL) && + (xmlGetDtdQAttrDesc(ctxt->myDoc->extSubset, + attr->elem, attr->name, + attr->prefix) == attr) && + (xmlGetDtdQAttrDesc(ctxt->myDoc->intSubset, + attr->elem, attr->name, + attr->prefix) == NULL)) { + xmlChar *fulln; + + if (attr->prefix != NULL) { + fulln = xmlStrdup(attr->prefix); + fulln = xmlStrcat(fulln, BAD_CAST ":"); + fulln = xmlStrcat(fulln, attr->name); + } else { + fulln = xmlStrdup(attr->name); + } + + /* + * Check that the attribute is not declared in the + * serialization + */ + att = NULL; + if (atts != NULL) { + i = 0; + att = atts[i]; + while (att != NULL) { + if (xmlStrEqual(att, fulln)) + break; + i += 2; + att = atts[i]; + } + } + if (att == NULL) { + if (ctxt->vctxt.error != NULL) + ctxt->vctxt.error(ctxt->vctxt.userData, + "standalone: attribute %s on %s defaulted from external subset\n", + fulln, attr->elem); + ctxt->valid = 0; + } + } + attr = attr->nexth; + } + } + + /* + * Actually insert defaulted values when needed + */ + attr = elemDecl->attributes; + while (attr != NULL) { + /* + * Make sure that attributes redefinition occuring in the + * internal subset are not overriden by definitions in the + * external subset. + */ + if (attr->defaultValue != NULL) { + /* + * the element should be instantiated in the tree if: + * - this is a namespace prefix + * - the user required for completion in the tree + * like XSLT + * - there isn't already an attribute definition + * in the internal subset overriding it. + */ + if (((attr->prefix != NULL) && + (xmlStrEqual(attr->prefix, BAD_CAST "xmlns"))) || + ((attr->prefix == NULL) && + (xmlStrEqual(attr->name, BAD_CAST "xmlns"))) || + (ctxt->loadsubset & XML_COMPLETE_ATTRS)) { + xmlAttributePtr tst; + + tst = xmlGetDtdQAttrDesc(ctxt->myDoc->intSubset, + attr->elem, attr->name, + attr->prefix); + if ((tst == attr) || (tst == NULL)) { + xmlChar *fulln; + + if (attr->prefix != NULL) { + fulln = xmlStrdup(attr->prefix); + fulln = xmlStrcat(fulln, BAD_CAST ":"); + fulln = xmlStrcat(fulln, attr->name); + } else { + fulln = xmlStrdup(attr->name); + } + + /* + * Check that the attribute is not declared in the + * serialization + */ + att = NULL; + if (atts != NULL) { + i = 0; + att = atts[i]; + while (att != NULL) { + if (xmlStrEqual(att, fulln)) + break; + i += 2; + att = atts[i]; + } + } + if (att == NULL) { + attribute(ctxt, fulln, attr->defaultValue); + } + xmlFree(fulln); + } + } + } + attr = attr->nexth; + } + if (internal == 1) { + elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->extSubset, + name, prefix); + internal = 0; + goto process_external_subset; + } + } +} + +/** + * startElement: + * @ctx: the user data (XML parser context) + * @fullname: The element name, including namespace prefix + * @atts: An array of name/value attributes pairs, NULL terminated + * + * called when an opening tag has been processed. + */ +void +startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + xmlNodePtr parent = ctxt->node; + xmlNsPtr ns; + xmlChar *name; + xmlChar *prefix; + const xmlChar *att; + const xmlChar *value; + int i; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.startElement(%s)\n", fullname); +#endif + + /* + * First check on validity: + */ + if (ctxt->validate && (ctxt->myDoc->extSubset == NULL) && + ((ctxt->myDoc->intSubset == NULL) || + ((ctxt->myDoc->intSubset->notations == NULL) && + (ctxt->myDoc->intSubset->elements == NULL) && + (ctxt->myDoc->intSubset->attributes == NULL) && + (ctxt->myDoc->intSubset->entities == NULL)))) { + if (ctxt->vctxt.error != NULL) { + ctxt->vctxt.error(ctxt->vctxt.userData, + "Validation failed: no DTD found !\n"); + } + ctxt->validate = 0; + ctxt->valid = 0; + ctxt->errNo = XML_ERR_NO_DTD; + } + + + /* + * Split the full name into a namespace prefix and the tag name + */ + name = xmlSplitQName(ctxt, fullname, &prefix); + + + /* + * Note : the namespace resolution is deferred until the end of the + * attributes parsing, since local namespace can be defined as + * an attribute at this level. + */ + ret = xmlNewDocNodeEatName(ctxt->myDoc, NULL, name, NULL); + if (ret == NULL) return; + if (ctxt->myDoc->children == NULL) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, "Setting %s as root\n", name); +#endif + xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret); + } else if (parent == NULL) { + parent = ctxt->myDoc->children; + } + ctxt->nodemem = -1; + if (ctxt->linenumbers) { + if (ctxt->input != NULL) + ret->content = (void *) (long) ctxt->input->line; + } + + /* + * We are parsing a new node. + */ +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, "pushing(%s)\n", name); +#endif + nodePush(ctxt, ret); + + /* + * Link the child element + */ + if (parent != NULL) { + if (parent->type == XML_ELEMENT_NODE) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding child %s to %s\n", name, parent->name); +#endif + xmlAddChild(parent, ret); + } else { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding sibling %s to ", name); + xmlDebugDumpOneNode(stderr, parent, 0); +#endif + xmlAddSibling(parent, ret); + } + } + + /* + * Insert all the defaulted attributes from the DTD especially namespaces + */ + if ((!ctxt->html) && + ((ctxt->myDoc->intSubset != NULL) || + (ctxt->myDoc->extSubset != NULL))) { + xmlCheckDefaultedAttributes(ctxt, name, prefix, atts); + } + + /* + * process all the attributes whose name start with "xmlns" + */ + if (atts != NULL) { + i = 0; + att = atts[i++]; + value = atts[i++]; + if (!ctxt->html) { + while ((att != NULL) && (value != NULL)) { + if ((att[0] == 'x') && (att[1] == 'm') && (att[2] == 'l') && + (att[3] == 'n') && (att[4] == 's')) + my_attribute(ctxt, att, value, prefix); + + att = atts[i++]; + value = atts[i++]; + } + } + } + + /* + * Search the namespace, note that since the attributes have been + * processed, the local namespaces are available. + */ + ns = xmlSearchNs(ctxt->myDoc, ret, prefix); + if ((ns == NULL) && (parent != NULL)) + ns = xmlSearchNs(ctxt->myDoc, parent, prefix); + if ((prefix != NULL) && (ns == NULL)) { + ns = xmlNewNs(ret, NULL, prefix); + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Namespace prefix %s is not defined\n", prefix); + } + + /* + * set the namespace node, making sure that if the default namspace + * is unbound on a parent we simply kee it NULL + */ + if ((ns != NULL) && (ns->href != NULL) && + ((ns->href[0] != 0) || (ns->prefix != NULL))) + xmlSetNs(ret, ns); + + /* + * process all the other attributes + */ + if (atts != NULL) { + i = 0; + att = atts[i++]; + value = atts[i++]; + if (ctxt->html) { + while (att != NULL) { + attribute(ctxt, att, value); + att = atts[i++]; + value = atts[i++]; + } + } else { + while ((att != NULL) && (value != NULL)) { + if ((att[0] != 'x') || (att[1] != 'm') || (att[2] != 'l') || + (att[3] != 'n') || (att[4] != 's')) + attribute(ctxt, att, value); + + /* + * Next ones + */ + att = atts[i++]; + value = atts[i++]; + } + } + } + + /* + * If it's the Document root, finish the DTD validation and + * check the document root element for validity + */ + if ((ctxt->validate) && (ctxt->vctxt.finishDtd == 0)) { + int chk; + + chk = xmlValidateDtdFinal(&ctxt->vctxt, ctxt->myDoc); + if (chk <= 0) + ctxt->valid = 0; + if (chk < 0) + ctxt->wellFormed = 0; + ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); + ctxt->vctxt.finishDtd = 1; + } + + if (prefix != NULL) + xmlFree(prefix); + +} + +/** + * endElement: + * @ctx: the user data (XML parser context) + * @name: The element name + * + * called when the end of an element has been detected. + */ +void +endElement(void *ctx, const xmlChar *name ATTRIBUTE_UNUSED) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserNodeInfo node_info; + xmlNodePtr cur = ctxt->node; + +#ifdef DEBUG_SAX + if (name == NULL) + xmlGenericError(xmlGenericErrorContext, "SAX.endElement(NULL)\n"); + else + xmlGenericError(xmlGenericErrorContext, "SAX.endElement(%s)\n", name); +#endif + + /* Capture end position and add node */ + if (cur != NULL && ctxt->record_info) { + node_info.end_pos = ctxt->input->cur - ctxt->input->base; + node_info.end_line = ctxt->input->line; + node_info.node = cur; + xmlParserAddNodeInfo(ctxt, &node_info); + } + ctxt->nodemem = -1; + + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateOneElement(&ctxt->vctxt, ctxt->myDoc, + cur); + + + /* + * end of parsing of this node. + */ +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, "popping(%s)\n", cur->name); +#endif + nodePop(ctxt); +} + +/** + * reference: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * called when an entity reference is detected. + */ +void +reference(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.reference(%s)\n", name); +#endif + if (name[0] == '#') + ret = xmlNewCharRef(ctxt->myDoc, name); + else + ret = xmlNewReference(ctxt->myDoc, name); +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "add reference %s to %s \n", name, ctxt->node->name); +#endif + xmlAddChild(ctxt->node, ret); +} + +/** + * characters: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * receiving some chars from the parser. + */ +void +characters(void *ctx, const xmlChar *ch, int len) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr lastChild; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.characters(%.30s, %d)\n", ch, len); +#endif + /* + * Handle the data if any. If there is no child + * add it as content, otherwise if the last child is text, + * concatenate it, else create a new node of type text. + */ + + if (ctxt->node == NULL) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "add chars: ctxt->node == NULL !\n"); +#endif + return; + } + lastChild = xmlGetLastChild(ctxt->node); +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "add chars to %s \n", ctxt->node->name); +#endif + + /* + * Here we needed an accelerator mechanism in case of very large + * elements. Use an attribute in the structure !!! + */ + if (lastChild == NULL) { + /* first node, first time */ + xmlNodeAddContentLen(ctxt->node, ch, len); + if (ctxt->node->children != NULL) { + ctxt->nodelen = len; + ctxt->nodemem = len + 1; + } + } else { + int coalesceText = (lastChild != NULL) && + (lastChild->type == XML_TEXT_NODE) && + (lastChild->name == xmlStringText); + if ((coalesceText) && (ctxt->nodemem != 0)) { + /* + * The whole point of maintaining nodelen and nodemem, + * xmlTextConcat is too costly, i.e. compute length, + * reallocate a new buffer, move data, append ch. Here + * We try to minimaze realloc() uses and avoid copying + * and recomputing length over and over. + */ + if (ctxt->nodelen + len >= ctxt->nodemem) { + xmlChar *newbuf; + int size; + + size = ctxt->nodemem + len; + size *= 2; + newbuf = (xmlChar *) xmlRealloc(lastChild->content,size); + if (newbuf == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SAX.characters(): out of memory\n"); + return; + } + ctxt->nodemem = size; + lastChild->content = newbuf; + } + memcpy(&lastChild->content[ctxt->nodelen], ch, len); + ctxt->nodelen += len; + lastChild->content[ctxt->nodelen] = 0; + } else if (coalesceText) { + xmlTextConcat(lastChild, ch, len); + if (ctxt->node->children != NULL) { + ctxt->nodelen = xmlStrlen(lastChild->content); + ctxt->nodemem = ctxt->nodelen + 1; + } + } else { + /* Mixed content, first time */ + lastChild = xmlNewTextLen(ch, len); + xmlAddChild(ctxt->node, lastChild); + if (ctxt->node->children != NULL) { + ctxt->nodelen = len; + ctxt->nodemem = len + 1; + } + } + } +} + +/** + * ignorableWhitespace: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * receiving some ignorable whitespaces from the parser. + * UNUSED: by default the DOM building will use characters + */ +void +ignorableWhitespace(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) +{ + /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.ignorableWhitespace(%.30s, %d)\n", ch, len); +#endif +} + +/** + * processingInstruction: + * @ctx: the user data (XML parser context) + * @target: the target name + * @data: the PI data's + * + * A processing instruction has been parsed. + */ +void +processingInstruction(void *ctx, const xmlChar *target, + const xmlChar *data) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + xmlNodePtr parent = ctxt->node; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.processingInstruction(%s, %s)\n", target, data); +#endif + + ret = xmlNewPI(target, data); + if (ret == NULL) return; + parent = ctxt->node; + + if (ctxt->inSubset == 1) { + xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret); + return; + } else if (ctxt->inSubset == 2) { + xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret); + return; + } + if ((ctxt->myDoc->children == NULL) || (parent == NULL)) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "Setting PI %s as root\n", target); +#endif + xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret); + return; + } + if (parent->type == XML_ELEMENT_NODE) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding PI %s child to %s\n", target, parent->name); +#endif + xmlAddChild(parent, ret); + } else { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding PI %s sibling to ", target); + xmlDebugDumpOneNode(stderr, parent, 0); +#endif + xmlAddSibling(parent, ret); + } +} + +/** + * globalNamespace: + * @ctx: the user data (XML parser context) + * @href: the namespace associated URN + * @prefix: the namespace prefix + * + * An old global namespace has been parsed. + */ +void +globalNamespace(void *ctx, const xmlChar *href, const xmlChar *prefix) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.globalNamespace(%s, %s)\n", href, prefix); +#endif + xmlNewGlobalNs(ctxt->myDoc, href, prefix); +} + +/** + * setNamespace: + * @ctx: the user data (XML parser context) + * @name: the namespace prefix + * + * Set the current element namespace. + */ + +void +setNamespace(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNsPtr ns; + xmlNodePtr parent; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, "SAX.setNamespace(%s)\n", name); +#endif + ns = xmlSearchNs(ctxt->myDoc, ctxt->node, name); + if (ns == NULL) { /* ctxt->node may not have a parent yet ! */ + if (ctxt->nodeNr >= 2) { + parent = ctxt->nodeTab[ctxt->nodeNr - 2]; + if (parent != NULL) + ns = xmlSearchNs(ctxt->myDoc, parent, name); + } + } + xmlSetNs(ctxt->node, ns); +} + +/** + * getNamespace: + * @ctx: the user data (XML parser context) + * + * Get the current element namespace. + * + * Returns the xmlNsPtr or NULL if none + */ + +xmlNsPtr +getNamespace(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNsPtr ret; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, "SAX.getNamespace()\n"); +#endif + ret = ctxt->node->ns; + return(ret); +} + +/** + * checkNamespace: + * @ctx: the user data (XML parser context) + * @namespace: the namespace to check against + * + * Check that the current element namespace is the same as the + * one read upon parsing. + * + * Returns 1 if true 0 otherwise + */ + +int +checkNamespace(void *ctx, xmlChar *namespace) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr cur = ctxt->node; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.checkNamespace(%s)\n", namespace); +#endif + + /* + * Check that the Name in the ETag is the same as in the STag. + */ + if (namespace == NULL) { + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "End tags for %s don't hold the namespace %s\n", + cur->name, cur->ns->prefix); + ctxt->wellFormed = 0; + } + } else { + if ((cur->ns == NULL) || (cur->ns->prefix == NULL)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "End tags %s holds a prefix %s not used by the open tag\n", + cur->name, namespace); + ctxt->wellFormed = 0; + } else if (!xmlStrEqual(namespace, cur->ns->prefix)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Start and End tags for %s don't use the same namespaces: %s and %s\n", + cur->name, cur->ns->prefix, namespace); + ctxt->wellFormed = 0; + } else + return(1); + } + return(0); +} + +/** + * namespaceDecl: + * @ctx: the user data (XML parser context) + * @href: the namespace associated URN + * @prefix: the namespace prefix + * + * A namespace has been parsed. + */ +void +namespaceDecl(void *ctx, const xmlChar *href, const xmlChar *prefix) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + if (prefix == NULL) + xmlGenericError(xmlGenericErrorContext, + "SAX.namespaceDecl(%s, NULL)\n", href); + else + xmlGenericError(xmlGenericErrorContext, + "SAX.namespaceDecl(%s, %s)\n", href, prefix); +#endif + xmlNewNs(ctxt->node, href, prefix); +} + +/** + * comment: + * @ctx: the user data (XML parser context) + * @value: the comment content + * + * A comment has been parsed. + */ +void +comment(void *ctx, const xmlChar *value) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + xmlNodePtr parent = ctxt->node; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, "SAX.comment(%s)\n", value); +#endif + ret = xmlNewDocComment(ctxt->myDoc, value); + if (ret == NULL) return; + + if (ctxt->inSubset == 1) { + xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret); + return; + } else if (ctxt->inSubset == 2) { + xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret); + return; + } + if ((ctxt->myDoc->children == NULL) || (parent == NULL)) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "Setting comment as root\n"); +#endif + xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret); + return; + } + if (parent->type == XML_ELEMENT_NODE) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding comment child to %s\n", parent->name); +#endif + xmlAddChild(parent, ret); + } else { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding comment sibling to "); + xmlDebugDumpOneNode(stderr, parent, 0); +#endif + xmlAddSibling(parent, ret); + } +} + +/** + * cdataBlock: + * @ctx: the user data (XML parser context) + * @value: The pcdata content + * @len: the block length + * + * called when a pcdata block has been parsed + */ +void +cdataBlock(void *ctx, const xmlChar *value, int len) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret, lastChild; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.pcdata(%.10s, %d)\n", value, len); +#endif + lastChild = xmlGetLastChild(ctxt->node); +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "add chars to %s \n", ctxt->node->name); +#endif + if ((lastChild != NULL) && + (lastChild->type == XML_CDATA_SECTION_NODE)) { + xmlTextConcat(lastChild, value, len); + } else { + ret = xmlNewCDataBlock(ctxt->myDoc, value, len); + xmlAddChild(ctxt->node, ret); + } +} + +/** + * initxmlDefaultSAXHandler: + * @hdlr: the SAX handler + * @warning: flag if non-zero sets the handler warning procedure + * + * Initialize the default XML SAX handler + */ +void +initxmlDefaultSAXHandler(xmlSAXHandler *hdlr, int warning) +{ + if(hdlr->initialized == 1) + return; + + hdlr->internalSubset = internalSubset; + hdlr->externalSubset = externalSubset; + hdlr->isStandalone = isStandalone; + hdlr->hasInternalSubset = hasInternalSubset; + hdlr->hasExternalSubset = hasExternalSubset; + hdlr->resolveEntity = resolveEntity; + hdlr->getEntity = getEntity; + hdlr->getParameterEntity = getParameterEntity; + hdlr->entityDecl = entityDecl; + hdlr->attributeDecl = attributeDecl; + hdlr->elementDecl = elementDecl; + hdlr->notationDecl = notationDecl; + hdlr->unparsedEntityDecl = unparsedEntityDecl; + hdlr->setDocumentLocator = setDocumentLocator; + hdlr->startDocument = startDocument; + hdlr->endDocument = endDocument; + hdlr->startElement = startElement; + hdlr->endElement = endElement; + hdlr->reference = reference; + hdlr->characters = characters; + hdlr->cdataBlock = cdataBlock; + hdlr->ignorableWhitespace = characters; + hdlr->processingInstruction = processingInstruction; + hdlr->comment = comment; + /* if (xmlGetWarningsDefaultValue == 0) */ + if (warning == 0) + hdlr->warning = NULL; + else + hdlr->warning = xmlParserWarning; + hdlr->error = xmlParserError; + hdlr->fatalError = xmlParserError; + + hdlr->initialized = 1; +} + +/** + * xmlDefaultSAXHandlerInit: + * + * Initialize the default SAX handler + */ +void +xmlDefaultSAXHandlerInit(void) +{ + initxmlDefaultSAXHandler(&xmlDefaultSAXHandler, xmlGetWarningsDefaultValue); +} + +#ifdef LIBXML_HTML_ENABLED + +/** + * inithtmlDefaultSAXHandler: + * @hdlr: the SAX handler + * + * Initialize the default HTML SAX handler + */ +void +inithtmlDefaultSAXHandler(xmlSAXHandler *hdlr) +{ + if(hdlr->initialized == 1) + return; + + hdlr->internalSubset = internalSubset; + hdlr->externalSubset = NULL; + hdlr->isStandalone = NULL; + hdlr->hasInternalSubset = NULL; + hdlr->hasExternalSubset = NULL; + hdlr->resolveEntity = NULL; + hdlr->getEntity = getEntity; + hdlr->getParameterEntity = NULL; + hdlr->entityDecl = NULL; + hdlr->attributeDecl = NULL; + hdlr->elementDecl = NULL; + hdlr->notationDecl = NULL; + hdlr->unparsedEntityDecl = NULL; + hdlr->setDocumentLocator = setDocumentLocator; + hdlr->startDocument = startDocument; + hdlr->endDocument = endDocument; + hdlr->startElement = startElement; + hdlr->endElement = endElement; + hdlr->reference = NULL; + hdlr->characters = characters; + hdlr->cdataBlock = cdataBlock; + hdlr->ignorableWhitespace = ignorableWhitespace; + hdlr->processingInstruction = NULL; + hdlr->comment = comment; + hdlr->warning = xmlParserWarning; + hdlr->error = xmlParserError; + hdlr->fatalError = xmlParserError; + + hdlr->initialized = 1; +} + +/** + * htmlDefaultSAXHandlerInit: + * + * Initialize the default SAX handler + */ +void +htmlDefaultSAXHandlerInit(void) +{ + inithtmlDefaultSAXHandler(&htmlDefaultSAXHandler); +} + +#endif /* LIBXML_HTML_ENABLED */ + +#ifdef LIBXML_DOCB_ENABLED + +/** + * initdocbDefaultSAXHandler: + * @hdlr: the SAX handler + * + * Initialize the default DocBook SAX handler + */ +void +initdocbDefaultSAXHandler(xmlSAXHandler *hdlr) +{ + if(hdlr->initialized == 1) + return; + + hdlr->internalSubset = internalSubset; + hdlr->externalSubset = NULL; + hdlr->isStandalone = isStandalone; + hdlr->hasInternalSubset = hasInternalSubset; + hdlr->hasExternalSubset = hasExternalSubset; + hdlr->resolveEntity = resolveEntity; + hdlr->getEntity = getEntity; + hdlr->getParameterEntity = NULL; + hdlr->entityDecl = entityDecl; + hdlr->attributeDecl = NULL; + hdlr->elementDecl = NULL; + hdlr->notationDecl = NULL; + hdlr->unparsedEntityDecl = NULL; + hdlr->setDocumentLocator = setDocumentLocator; + hdlr->startDocument = startDocument; + hdlr->endDocument = endDocument; + hdlr->startElement = startElement; + hdlr->endElement = endElement; + hdlr->reference = reference; + hdlr->characters = characters; + hdlr->cdataBlock = NULL; + hdlr->ignorableWhitespace = ignorableWhitespace; + hdlr->processingInstruction = NULL; + hdlr->comment = comment; + hdlr->warning = xmlParserWarning; + hdlr->error = xmlParserError; + hdlr->fatalError = xmlParserError; + + hdlr->initialized = 1; +} + +/** + * docbDefaultSAXHandlerInit: + * + * Initialize the default SAX handler + */ +void +docbDefaultSAXHandlerInit(void) +{ + initdocbDefaultSAXHandler(&docbDefaultSAXHandler); +} + +#endif /* LIBXML_DOCB_ENABLED */ diff --git a/bundle/libxml/c14n.c b/bundle/libxml/c14n.c new file mode 100644 index 0000000000..cd410ccf1f --- /dev/null +++ b/bundle/libxml/c14n.c @@ -0,0 +1,1930 @@ +/* + * "Canonical XML" implementation + * http://www.w3.org/TR/xml-c14n + * + * "Exclusive XML Canonicalization" implementation + * http://www.w3.org/TR/xml-exc-c14n + * + * See Copyright for the status of this software. + * + * Author: Aleksey Sanin <aleksey@aleksey.com> + */ +#define IN_LIBXML +#include "libxml.h" +#ifdef LIBXML_C14N_ENABLED + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#include <string.h> + +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/uri.h> +#include <libxml/xmlerror.h> +#include <libxml/globals.h> +#include <libxml/xpathInternals.h> +#include <libxml/c14n.h> + +/************************************************************************ + * * + * Some declaration better left private ATM * + * * + ************************************************************************/ + +typedef enum { + XMLC14N_BEFORE_DOCUMENT_ELEMENT = 0, + XMLC14N_INSIDE_DOCUMENT_ELEMENT = 1, + XMLC14N_AFTER_DOCUMENT_ELEMENT = 2 +} xmlC14NPosition; + +typedef struct _xmlC14NVisibleNsStack { + int nsCurEnd; /* number of nodes in the set */ + int nsPrevStart; /* the begginning of the stack for previous visible node */ + int nsPrevEnd; /* the end of the stack for previous visible node */ + int nsMax; /* size of the array as allocated */ + xmlNsPtr *nsTab; /* array of ns in no particular order */ + xmlNodePtr *nodeTab;/* array of nodes in no particular order */ +} xmlC14NVisibleNsStack, *xmlC14NVisibleNsStackPtr; + +typedef struct _xmlC14NCtx { + /* input parameters */ + xmlDocPtr doc; + xmlC14NIsVisibleCallback is_visible_callback; + void* user_data; + int with_comments; + xmlOutputBufferPtr buf; + + /* position in the XML document */ + xmlC14NPosition pos; + int parent_is_doc; + xmlC14NVisibleNsStackPtr ns_rendered; + + /* exclusive canonicalization */ + int exclusive; + xmlChar **inclusive_ns_prefixes; +} xmlC14NCtx, *xmlC14NCtxPtr; + +static xmlC14NVisibleNsStackPtr xmlC14NVisibleNsStackCreate (void); +static void xmlC14NVisibleNsStackDestroy (xmlC14NVisibleNsStackPtr cur); +static void xmlC14NVisibleNsStackAdd (xmlC14NVisibleNsStackPtr cur, + xmlNsPtr ns, + xmlNodePtr node); +static void xmlC14NVisibleNsStackSave (xmlC14NVisibleNsStackPtr cur, + xmlC14NVisibleNsStackPtr state); +static void xmlC14NVisibleNsStackRestore (xmlC14NVisibleNsStackPtr cur, + xmlC14NVisibleNsStackPtr state); +static void xmlC14NVisibleNsStackShift (xmlC14NVisibleNsStackPtr cur); +static int xmlC14NVisibleNsStackFind (xmlC14NVisibleNsStackPtr cur, + xmlNsPtr ns); +static int xmlExcC14NVisibleNsStackFind (xmlC14NVisibleNsStackPtr cur, + xmlNsPtr ns, + xmlC14NCtxPtr ctx); + +static int xmlC14NIsNodeInNodeset (xmlNodeSetPtr nodes, + xmlNodePtr node, + xmlNodePtr parent); + + + +static int xmlC14NProcessNode(xmlC14NCtxPtr ctx, xmlNodePtr cur); +static int xmlC14NProcessNodeList(xmlC14NCtxPtr ctx, xmlNodePtr cur); +typedef enum { + XMLC14N_NORMALIZE_ATTR = 0, + XMLC14N_NORMALIZE_COMMENT = 1, + XMLC14N_NORMALIZE_PI = 2, + XMLC14N_NORMALIZE_TEXT = 3 +} xmlC14NNormalizationMode; + +static xmlChar *xmlC11NNormalizeString(const xmlChar * input, + xmlC14NNormalizationMode mode); + +#define xmlC11NNormalizeAttr( a ) \ + xmlC11NNormalizeString((a), XMLC14N_NORMALIZE_ATTR) +#define xmlC11NNormalizeComment( a ) \ + xmlC11NNormalizeString((a), XMLC14N_NORMALIZE_COMMENT) +#define xmlC11NNormalizePI( a ) \ + xmlC11NNormalizeString((a), XMLC14N_NORMALIZE_PI) +#define xmlC11NNormalizeText( a ) \ + xmlC11NNormalizeString((a), XMLC14N_NORMALIZE_TEXT) + +#define xmlC14NIsVisible( ctx, node, parent ) \ + (((ctx)->is_visible_callback != NULL) ? \ + (ctx)->is_visible_callback((ctx)->user_data, \ + (xmlNodePtr)(node), (xmlNodePtr)(parent)) : 1) +/************************************************************************ + * * + * The implementation internals * + * * + ************************************************************************/ +#define XML_NAMESPACES_DEFAULT 16 + +static int +xmlC14NIsNodeInNodeset(xmlNodeSetPtr nodes, xmlNodePtr node, xmlNodePtr parent) { + if((nodes != NULL) && (node != NULL)) { + if(node->type != XML_NAMESPACE_DECL) { + return(xmlXPathNodeSetContains(nodes, node)); + } else { + xmlNs ns; + + memcpy(&ns, node, sizeof(ns)); + ns.next = (xmlNsPtr)parent; /* this is a libxml hack! check xpath.c for details */ + + /* + * If the input is an XPath node-set, then the node-set must explicitly + * contain every node to be rendered to the canonical form. + */ + return(xmlXPathNodeSetContains(nodes, (xmlNodePtr)&ns)); + } + } + return(1); +} + +static xmlC14NVisibleNsStackPtr +xmlC14NVisibleNsStackCreate(void) { + xmlC14NVisibleNsStackPtr ret; + + ret = (xmlC14NVisibleNsStackPtr) xmlMalloc(sizeof(xmlC14NVisibleNsStack)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackCreate: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlC14NVisibleNsStack)); + return(ret); +} + +static void +xmlC14NVisibleNsStackDestroy(xmlC14NVisibleNsStackPtr cur) { + if(cur == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackDestroy: cur is null.\n"); +#endif + return; + } + if(cur->nsTab != NULL) { + memset(cur->nsTab, 0, cur->nsMax * sizeof(xmlNsPtr)); + xmlFree(cur->nsTab); + } + if(cur->nodeTab != NULL) { + memset(cur->nodeTab, 0, cur->nsMax * sizeof(xmlNodePtr)); + xmlFree(cur->nodeTab); + } + memset(cur, 0, sizeof(xmlC14NVisibleNsStack)); + xmlFree(cur); + +} + +static void +xmlC14NVisibleNsStackAdd(xmlC14NVisibleNsStackPtr cur, xmlNsPtr ns, xmlNodePtr node) { + if((cur == NULL) || + ((cur->nsTab == NULL) && (cur->nodeTab != NULL)) || + ((cur->nsTab != NULL) && (cur->nodeTab == NULL))) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackAdd: cur is null.\n"); +#endif + return; + } + + if ((cur->nsTab == NULL) && (cur->nodeTab == NULL)) { + cur->nsTab = (xmlNsPtr*) xmlMalloc(XML_NAMESPACES_DEFAULT * sizeof(xmlNsPtr)); + cur->nodeTab = (xmlNodePtr*) xmlMalloc(XML_NAMESPACES_DEFAULT * sizeof(xmlNodePtr)); + if ((cur->nsTab == NULL) || (cur->nodeTab == NULL)) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackAdd: out of memory\n"); + return; + } + memset(cur->nsTab, 0 , XML_NAMESPACES_DEFAULT * sizeof(xmlNsPtr)); + memset(cur->nodeTab, 0 , XML_NAMESPACES_DEFAULT * sizeof(xmlNodePtr)); + cur->nsMax = XML_NAMESPACES_DEFAULT; + } else if(cur->nsMax == cur->nsCurEnd) { + void *tmp; + int tmpSize; + + tmpSize = 2 * cur->nsMax; + tmp = xmlRealloc(cur->nsTab, tmpSize * sizeof(xmlNsPtr)); + if (tmp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackAdd: out of memory\n"); + return; + } + cur->nsTab = (xmlNsPtr*)tmp; + + tmp = xmlRealloc(cur->nodeTab, tmpSize * sizeof(xmlNodePtr)); + if (tmp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackAdd: out of memory\n"); + return; + } + cur->nodeTab = (xmlNodePtr*)tmp; + + cur->nsMax = tmpSize; + } + cur->nsTab[cur->nsCurEnd] = ns; + cur->nodeTab[cur->nsCurEnd] = node; + + ++cur->nsCurEnd; +} + +static void +xmlC14NVisibleNsStackSave(xmlC14NVisibleNsStackPtr cur, xmlC14NVisibleNsStackPtr state) { + if((cur == NULL) || (state == NULL)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackSave: cur or state is null.\n"); +#endif + return; + } + + state->nsCurEnd = cur->nsCurEnd; + state->nsPrevStart = cur->nsPrevStart; + state->nsPrevEnd = cur->nsPrevEnd; +} + +static void +xmlC14NVisibleNsStackRestore(xmlC14NVisibleNsStackPtr cur, xmlC14NVisibleNsStackPtr state) { + if((cur == NULL) || (state == NULL)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackRestore: cur or state is null.\n"); +#endif + return; + } + cur->nsCurEnd = state->nsCurEnd; + cur->nsPrevStart = state->nsPrevStart; + cur->nsPrevEnd = state->nsPrevEnd; +} + +static void +xmlC14NVisibleNsStackShift(xmlC14NVisibleNsStackPtr cur) { + if(cur == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackRestore: cur is null.\n"); +#endif + return; + } + cur->nsPrevStart = cur->nsPrevEnd; + cur->nsPrevEnd = cur->nsCurEnd; +} + +static int +xmlC14NStrEqual(const xmlChar *str1, const xmlChar *str2) { + if (str1 == str2) return(1); + if (str1 == NULL) return((*str2) == '\0'); + if (str2 == NULL) return((*str1) == '\0'); + do { + if (*str1++ != *str2) return(0); + } while (*str2++); + return(1); +} + +/** + * xmlC14NVisibleNsStackFind: + * @ctx: the C14N context + * @ns: the namespace to check + * + * Checks whether the given namespace was already rendered or not + * + * Returns 1 if we already wrote this namespace or 0 otherwise + */ +static int +xmlC14NVisibleNsStackFind(xmlC14NVisibleNsStackPtr cur, xmlNsPtr ns) +{ + int i; + const xmlChar *prefix; + const xmlChar *href; + int has_empty_ns; + + if(cur == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NVisibleNsStackFind: cur is null.\n"); +#endif + return (0); + } + + /* + * if the default namespace xmlns="" is not defined yet then + * we do not want to print it out + */ + prefix = ((ns == NULL) || (ns->prefix == NULL)) ? BAD_CAST "" : ns->prefix; + href = ((ns == NULL) || (ns->href == NULL)) ? BAD_CAST "" : ns->href; + has_empty_ns = (xmlC14NStrEqual(prefix, NULL) && xmlC14NStrEqual(href, NULL)); + + if (cur->nsTab != NULL) { + int start = (has_empty_ns) ? 0 : cur->nsPrevStart; + for (i = cur->nsCurEnd - 1; i >= start; --i) { + xmlNsPtr ns1 = cur->nsTab[i]; + + if(xmlC14NStrEqual(prefix, (ns1 != NULL) ? ns1->prefix : NULL)) { + return(xmlC14NStrEqual(href, (ns1 != NULL) ? ns1->href : NULL)); + } + } + } + return(has_empty_ns); +} + +static int +xmlExcC14NVisibleNsStackFind(xmlC14NVisibleNsStackPtr cur, xmlNsPtr ns, xmlC14NCtxPtr ctx) { + int i; + const xmlChar *prefix; + const xmlChar *href; + int has_empty_ns; + + if(cur == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlExcC14NVisibleNsStackFind: cur is null.\n"); +#endif + return (0); + } + + /* + * if the default namespace xmlns="" is not defined yet then + * we do not want to print it out + */ + prefix = ((ns == NULL) || (ns->prefix == NULL)) ? BAD_CAST "" : ns->prefix; + href = ((ns == NULL) || (ns->href == NULL)) ? BAD_CAST "" : ns->href; + has_empty_ns = (xmlC14NStrEqual(prefix, NULL) && xmlC14NStrEqual(href, NULL)); + + if (cur->nsTab != NULL) { + int start = 0; + for (i = cur->nsCurEnd - 1; i >= start; --i) { + xmlNsPtr ns1 = cur->nsTab[i]; + + if(xmlC14NStrEqual(prefix, (ns1 != NULL) ? ns1->prefix : NULL)) { + if(xmlC14NStrEqual(href, (ns1 != NULL) ? ns1->href : NULL)) { + return(xmlC14NIsVisible(ctx, ns1, cur->nodeTab[i])); + } else { + return(0); + } + } + } + } + return(has_empty_ns); +} + + + + +/** + * xmlC14NIsXmlNs: + * @ns: the namespace to check + * + * Checks whether the given namespace is a default "xml:" namespace + * with href="http://www.w3.org/XML/1998/namespace" + * + * Returns 1 if the node is default or 0 otherwise + */ + +/* todo: make it a define? */ +static int +xmlC14NIsXmlNs(xmlNsPtr ns) +{ + return ((ns != NULL) && + (xmlStrEqual(ns->prefix, BAD_CAST "xml")) && + (xmlStrEqual(ns->href, + BAD_CAST + "http://www.w3.org/XML/1998/namespace"))); +} + + +/** + * xmlC14NNsCompare: + * @ns1: the pointer to first namespace + * @ns2: the pointer to second namespace + * + * Compares the namespaces by names (prefixes). + * + * Returns -1 if ns1 < ns2, 0 if ns1 == ns2 or 1 if ns1 > ns2. + */ +static int +xmlC14NNsCompare(xmlNsPtr ns1, xmlNsPtr ns2) +{ + if (ns1 == ns2) + return (0); + if (ns1 == NULL) + return (-1); + if (ns2 == NULL) + return (1); + + return (xmlStrcmp(ns1->prefix, ns2->prefix)); +} + + +/** + * xmlC14NPrintNamespaces: + * @ns: the pointer to namespace + * @ctx: the C14N context + * + * Prints the given namespace to the output buffer from C14N context. + * + * Returns 1 on success or 0 on fail. + */ +static int +xmlC14NPrintNamespaces(const xmlNsPtr ns, xmlC14NCtxPtr ctx) +{ + + if ((ns == NULL) || (ctx == NULL)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NPrintNamespace: namespace or context pointer is null\n"); +#endif + return 0; + } + + if (ns->prefix != NULL) { + xmlOutputBufferWriteString(ctx->buf, " xmlns:"); + xmlOutputBufferWriteString(ctx->buf, (const char *) ns->prefix); + xmlOutputBufferWriteString(ctx->buf, "=\""); + } else { + xmlOutputBufferWriteString(ctx->buf, " xmlns=\""); + } + if(ns->href != NULL) { + xmlOutputBufferWriteString(ctx->buf, (const char *) ns->href); + } + xmlOutputBufferWriteString(ctx->buf, "\""); + return (1); +} + +/** + * xmlC14NProcessNamespacesAxis: + * @ctx: the C14N context + * @node: the current node + * + * Prints out canonical namespace axis of the current node to the + * buffer from C14N context as follows + * + * Canonical XML v 1.0 (http://www.w3.org/TR/xml-c14n) + * + * Namespace Axis + * Consider a list L containing only namespace nodes in the + * axis and in the node-set in lexicographic order (ascending). To begin + * processing L, if the first node is not the default namespace node (a node + * with no namespace URI and no local name), then generate a space followed + * by xmlns="" if and only if the following conditions are met: + * - the element E that owns the axis is in the node-set + * - The nearest ancestor element of E in the node-set has a default + * namespace node in the node-set (default namespace nodes always + * have non-empty values in XPath) + * The latter condition eliminates unnecessary occurrences of xmlns="" in + * the canonical form since an element only receives an xmlns="" if its + * default namespace is empty and if it has an immediate parent in the + * canonical form that has a non-empty default namespace. To finish + * processing L, simply process every namespace node in L, except omit + * namespace node with local name xml, which defines the xml prefix, + * if its string value is http://www.w3.org/XML/1998/namespace. + * + * Exclusive XML Canonicalization v 1.0 (http://www.w3.org/TR/xml-exc-c14n) + * Canonical XML applied to a document subset requires the search of the + * ancestor nodes of each orphan element node for attributes in the xml + * namespace, such as xml:lang and xml:space. These are copied into the + * element node except if a declaration of the same attribute is already + * in the attribute axis of the element (whether or not it is included in + * the document subset). This search and copying are omitted from the + * Exclusive XML Canonicalization method. + * + * Returns 0 on success or -1 on fail. + */ +static int +xmlC14NProcessNamespacesAxis(xmlC14NCtxPtr ctx, xmlNodePtr cur, int visible) +{ + xmlNodePtr n; + xmlNsPtr ns, tmp; + xmlListPtr list; + int already_rendered; + int has_empty_ns = 0; + + if ((ctx == NULL) || (cur == NULL) || (cur->type != XML_ELEMENT_NODE)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNamespacesAxis: Null context or node pointer or type != XML_ELEMENT_NODE.\n"); +#endif + return (-1); + } + + /* + * Create a sorted list to store element namespaces + */ + list = xmlListCreate(NULL, (xmlListDataCompare) xmlC14NNsCompare); + if (list == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNamespacesAxis: list creation failed\n"); +#endif + return (-1); + } + + /* check all namespaces */ + for(n = cur; n != NULL; n = n->parent) { + for(ns = n->nsDef; ns != NULL; ns = ns->next) { + tmp = xmlSearchNs(cur->doc, cur, ns->prefix); + + if((tmp == ns) && !xmlC14NIsXmlNs(ns) && xmlC14NIsVisible(ctx, ns, cur)) { + already_rendered = xmlC14NVisibleNsStackFind(ctx->ns_rendered, ns); + if(visible) { + xmlC14NVisibleNsStackAdd(ctx->ns_rendered, ns, cur); + } + if(!already_rendered) { + xmlListInsert(list, ns); + } + if(xmlStrlen(ns->prefix) == 0) { + has_empty_ns = 1; + } + } + } + } + + /** + * if the first node is not the default namespace node (a node with no + * namespace URI and no local name), then generate a space followed by + * xmlns="" if and only if the following conditions are met: + * - the element E that owns the axis is in the node-set + * - the nearest ancestor element of E in the node-set has a default + * namespace node in the node-set (default namespace nodes always + * have non-empty values in XPath) + */ + if(visible && !has_empty_ns) { + static xmlNs ns_default; + + memset(&ns_default, 0, sizeof(ns_default)); + if(!xmlC14NVisibleNsStackFind(ctx->ns_rendered, &ns_default)) { + xmlC14NPrintNamespaces(&ns_default, ctx); + } + } + + + /* + * print out all elements from list + */ + xmlListWalk(list, (xmlListWalker) xmlC14NPrintNamespaces, (const void *) ctx); + + /* + * Cleanup + */ + xmlListDelete(list); + return (0); +} + + +/** + * xmlExcC14NProcessNamespacesAxis: + * @ctx: the C14N context + * @node: the current node + * + * Prints out exclusive canonical namespace axis of the current node to the + * buffer from C14N context as follows + * + * Exclusive XML Canonicalization + * http://www.w3.org/TR/xml-exc-c14n + * + * If the element node is in the XPath subset then output the node in + * accordance with Canonical XML except for namespace nodes which are + * rendered as follows: + * + * 1. Render each namespace node iff: + * * it is visibly utilized by the immediate parent element or one of + * its attributes, or is present in InclusiveNamespaces PrefixList, and + * * its prefix and value do not appear in ns_rendered. ns_rendered is + * obtained by popping the state stack in order to obtain a list of + * prefixes and their values which have already been rendered by + * an output ancestor of the namespace node's parent element. + * 2. Append the rendered namespace node to the list ns_rendered of namespace + * nodes rendered by output ancestors. Push ns_rendered on state stack and + * recurse. + * 3. After the recursion returns, pop thestate stack. + * + * + * Returns 0 on success or -1 on fail. + */ +static int +xmlExcC14NProcessNamespacesAxis(xmlC14NCtxPtr ctx, xmlNodePtr cur, int visible) +{ + xmlNsPtr ns; + xmlListPtr list; + xmlAttrPtr attr; + int already_rendered; + int has_empty_ns = 0; + int has_visibly_utilized_empty_ns = 0; + int has_empty_ns_in_inclusive_list = 0; + + if ((ctx == NULL) || (cur == NULL) || (cur->type != XML_ELEMENT_NODE)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlExcC14NProcessNamespacesAxis: Null context or node pointer or type != XML_ELEMENT_NODE.\n"); +#endif + return (-1); + } + + if(!ctx->exclusive) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlExcC14NProcessNamespacesAxis: called for non-exclusive canonization or rendered stack is NULL.\n"); +#endif + return (-1); + + } + + /* + * Create a sorted list to store element namespaces + */ + list = xmlListCreate(NULL, (xmlListDataCompare) xmlC14NNsCompare); + if (list == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlExcC14NProcessNamespacesAxis: list creation failed\n"); +#endif + return (-1); + } + + /* + * process inclusive namespaces: + * All namespace nodes appearing on inclusive ns list are + * handled as provided in Canonical XML + */ + if(ctx->inclusive_ns_prefixes != NULL) { + xmlChar *prefix; + int i; + + for (i = 0; ctx->inclusive_ns_prefixes[i] != NULL; ++i) { + prefix = ctx->inclusive_ns_prefixes[i]; + /* + * Special values for namespace with empty prefix + */ + if (xmlStrEqual(prefix, BAD_CAST "#default") + || xmlStrEqual(prefix, BAD_CAST "")) { + prefix = NULL; + has_empty_ns_in_inclusive_list = 1; + } + + ns = xmlSearchNs(cur->doc, cur, prefix); + if((ns != NULL) && !xmlC14NIsXmlNs(ns) && xmlC14NIsVisible(ctx, ns, cur)) { + already_rendered = xmlC14NVisibleNsStackFind(ctx->ns_rendered, ns); + if(visible) { + xmlC14NVisibleNsStackAdd(ctx->ns_rendered, ns, cur); + } + if(!already_rendered) { + xmlListInsert(list, ns); + } + if(xmlStrlen(ns->prefix) == 0) { + has_empty_ns = 1; + } + } + } + } + + /* add node namespace */ + if(cur->ns != NULL) { + ns = cur->ns; + } else { + ns = xmlSearchNs(cur->doc, cur, NULL); + has_visibly_utilized_empty_ns = 1; + } + if((ns != NULL) && !xmlC14NIsXmlNs(ns)) { + if(visible && xmlC14NIsVisible(ctx, ns, cur)) { + if(!xmlExcC14NVisibleNsStackFind(ctx->ns_rendered, ns, ctx)) { + xmlListInsert(list, ns); + } + } + if(visible) { + xmlC14NVisibleNsStackAdd(ctx->ns_rendered, ns, cur); + } + if(xmlStrlen(ns->prefix) == 0) { + has_empty_ns = 1; + } + } + + + /* add attributes */ + for(attr = cur->properties; attr != NULL; attr = attr->next) { + /* + * we need to check that attribute is visible and has non + * default namespace (XML Namespaces: "default namespaces + * do not apply directly to attributes") + */ + if((attr->ns != NULL) && xmlC14NIsVisible(ctx, attr, cur)) { + already_rendered = xmlExcC14NVisibleNsStackFind(ctx->ns_rendered, attr->ns, ctx); + xmlC14NVisibleNsStackAdd(ctx->ns_rendered, attr->ns, (xmlNodePtr)attr); + if(!already_rendered && visible) { + xmlListInsert(list, attr->ns); + } + if(xmlStrlen(attr->ns->prefix) == 0) { + has_empty_ns = 1; + } + } else if(attr->ns == NULL) { + has_visibly_utilized_empty_ns = 1; + } + } + + /* + * Process xmlns="" + */ + if(visible && has_visibly_utilized_empty_ns && + !has_empty_ns && !has_empty_ns_in_inclusive_list) { + static xmlNs ns_default; + + memset(&ns_default, 0, sizeof(ns_default)); + + already_rendered = xmlExcC14NVisibleNsStackFind(ctx->ns_rendered, &ns_default, ctx); + if(!already_rendered) { + xmlC14NPrintNamespaces(&ns_default, ctx); + } + } else if(visible && !has_empty_ns && has_empty_ns_in_inclusive_list) { + static xmlNs ns_default; + + memset(&ns_default, 0, sizeof(ns_default)); + if(!xmlC14NVisibleNsStackFind(ctx->ns_rendered, &ns_default)) { + xmlC14NPrintNamespaces(&ns_default, ctx); + } + } + + + + /* + * print out all elements from list + */ + xmlListWalk(list, (xmlListWalker) xmlC14NPrintNamespaces, (const void *) ctx); + + /* + * Cleanup + */ + xmlListDelete(list); + return (0); +} + + +/** + * xmlC14NAttrsCompare: + * @attr1: the pointer tls o first attr + * @attr2: the pointer to second attr + * + * Prints the given attribute to the output buffer from C14N context. + * + * Returns -1 if attr1 < attr2, 0 if attr1 == attr2 or 1 if attr1 > attr2. + */ +static int +xmlC14NAttrsCompare(xmlAttrPtr attr1, xmlAttrPtr attr2) +{ + int ret = 0; + + /* + * Simple cases + */ + if (attr1 == attr2) + return (0); + if (attr1 == NULL) + return (-1); + if (attr2 == NULL) + return (1); + if (attr1->ns == attr2->ns) { + return (xmlStrcmp(attr1->name, attr2->name)); + } + + /* + * Attributes in the default namespace are first + * because the default namespace is not applied to + * unqualified attributes + */ + if (attr1->ns == NULL) + return (-1); + if (attr2->ns == NULL) + return (1); + if (attr1->ns->prefix == NULL) + return (-1); + if (attr2->ns->prefix == NULL) + return (1); + + ret = xmlStrcmp(attr1->ns->href, attr2->ns->href); + if (ret == 0) { + ret = xmlStrcmp(attr1->name, attr2->name); + } + return (ret); +} + + +/** + * xmlC14NPrintAttrs: + * @attr: the pointer to attr + * @ctx: the C14N context + * + * Prints out canonical attribute urrent node to the + * buffer from C14N context as follows + * + * Canonical XML v 1.0 (http://www.w3.org/TR/xml-c14n) + * + * Returns 1 on success or 0 on fail. + */ +static int +xmlC14NPrintAttrs(const xmlAttrPtr attr, xmlC14NCtxPtr ctx) +{ + xmlChar *value; + xmlChar *buffer; + + if ((attr == NULL) || (ctx == NULL)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NPrintAttrs: attr == NULL or ctx == NULL\n"); +#endif + return (0); + } + + xmlOutputBufferWriteString(ctx->buf, " "); + if (attr->ns != NULL && xmlStrlen(attr->ns->prefix) > 0) { + xmlOutputBufferWriteString(ctx->buf, + (const char *) attr->ns->prefix); + xmlOutputBufferWriteString(ctx->buf, ":"); + } + xmlOutputBufferWriteString(ctx->buf, (const char *) attr->name); + xmlOutputBufferWriteString(ctx->buf, "=\""); + + value = xmlNodeListGetString(attr->doc, attr->children, 1); + /* todo: should we log an error if value==NULL ? */ + if (value != NULL) { + buffer = xmlC11NNormalizeAttr(value); + xmlFree(value); + if (buffer != NULL) { + xmlOutputBufferWriteString(ctx->buf, (const char *) buffer); + xmlFree(buffer); + } else { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NPrintAttrs: xmlC11NNormalizeAttr failed\n"); +#endif + return (0); + } + } + xmlOutputBufferWriteString(ctx->buf, "\""); + return (1); +} + +/** + * xmlC14NProcessAttrsAxis: + * @ctx: the C14N context + * @cur: the current node + * + * Prints out canonical attribute axis of the current node to the + * buffer from C14N context as follows + * + * Canonical XML v 1.0 (http://www.w3.org/TR/xml-c14n) + * + * Attribute Axis + * In lexicographic order (ascending), process each node that + * is in the element's attribute axis and in the node-set. + * + * The processing of an element node E MUST be modified slightly + * when an XPath node-set is given as input and the element's + * parent is omitted from the node-set. + * + * + * Exclusive XML Canonicalization v 1.0 (http://www.w3.org/TR/xml-exc-c14n) + * + * Canonical XML applied to a document subset requires the search of the + * ancestor nodes of each orphan element node for attributes in the xml + * namespace, such as xml:lang and xml:space. These are copied into the + * element node except if a declaration of the same attribute is already + * in the attribute axis of the element (whether or not it is included in + * the document subset). This search and copying are omitted from the + * Exclusive XML Canonicalization method. + * + * Returns 0 on success or -1 on fail. + */ +static int +xmlC14NProcessAttrsAxis(xmlC14NCtxPtr ctx, xmlNodePtr cur) +{ + xmlAttrPtr attr; + xmlListPtr list; + + if ((ctx == NULL) || (cur == NULL) || (cur->type != XML_ELEMENT_NODE)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessAttrsAxis: Null context or node pointer or type != XML_ELEMENT_NODE.\n"); +#endif + return (-1); + } + + /* + * Create a sorted list to store element attributes + */ + list = xmlListCreate(NULL, (xmlListDataCompare) xmlC14NAttrsCompare); + if (list == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessAttrsAxis: list creation failed\n"); +#endif + return (-1); + } + + /* + * Add all visible attributes from current node. + */ + attr = cur->properties; + while (attr != NULL) { + /* check that attribute is visible */ + if (xmlC14NIsVisible(ctx, attr, cur)) { + xmlListInsert(list, attr); + } + attr = attr->next; + } + + /* + * include attributes in "xml" namespace defined in ancestors + * (only for non-exclusive XML Canonicalization) + */ + if ((!ctx->exclusive) && (cur->parent != NULL) + && (!xmlC14NIsVisible(ctx, cur->parent, cur->parent->parent))) { + /* + * If XPath node-set is not specified then the parent is always + * visible! + */ + cur = cur->parent; + while (cur != NULL) { + attr = cur->properties; + while (attr != NULL) { + if ((attr->ns != NULL) + && (xmlStrEqual(attr->ns->prefix, BAD_CAST "xml"))) { + if (xmlListSearch(list, attr) == NULL) { + xmlListInsert(list, attr); + } + } + attr = attr->next; + } + cur = cur->parent; + } + } + + /* + * print out all elements from list + */ + xmlListWalk(list, (xmlListWalker) xmlC14NPrintAttrs, (const void *) ctx); + + /* + * Cleanup + */ + xmlListDelete(list); + return (0); +} + +/** + * xmlC14NCheckForRelativeNamespaces: + * @ctx: the C14N context + * @cur: the current element node + * + * Checks that current element node has no relative namespaces defined + * + * Returns 0 if the node has no relative namespaces or -1 otherwise. + */ +static int +xmlC14NCheckForRelativeNamespaces(xmlC14NCtxPtr ctx, xmlNodePtr cur) +{ + xmlNsPtr ns; + + if ((ctx == NULL) || (cur == NULL) || (cur->type != XML_ELEMENT_NODE)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NCheckForRelativeNamespaces: Null context or node pointer or type != XML_ELEMENT_NODE.\n"); +#endif + return (-1); + } + + ns = cur->nsDef; + while (ns != NULL) { + if (xmlStrlen(ns->href) > 0) { + xmlURIPtr uri; + + uri = xmlParseURI((const char *) ns->href); + if (uri == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NCheckForRelativeNamespaces: unable to parse uri=\"%s\".\n", + ns->href); +#endif + return (-1); + } + if (xmlStrlen((const xmlChar *) uri->scheme) == 0) { + xmlFreeURI(uri); + return (-1); + } + if ((!xmlStrEqual + ((const xmlChar *) uri->scheme, BAD_CAST "urn")) + && (xmlStrlen((const xmlChar *) uri->server) == 0)) { + xmlFreeURI(uri); + return (-1); + } + xmlFreeURI(uri); + } + ns = ns->next; + } + return (0); +} + +/** + * xmlC14NProcessElementNode: + * @ctx: the pointer to C14N context object + * @cur: the node to process + * + * Canonical XML v 1.0 (http://www.w3.org/TR/xml-c14n) + * + * Element Nodes + * If the element is not in the node-set, then the result is obtained + * by processing the namespace axis, then the attribute axis, then + * processing the child nodes of the element that are in the node-set + * (in document order). If the element is in the node-set, then the result + * is an open angle bracket (<), the element QName, the result of + * processing the namespace axis, the result of processing the attribute + * axis, a close angle bracket (>), the result of processing the child + * nodes of the element that are in the node-set (in document order), an + * open angle bracket, a forward slash (/), the element QName, and a close + * angle bracket. + * + * Returns non-negative value on success or negative value on fail + */ +static int +xmlC14NProcessElementNode(xmlC14NCtxPtr ctx, xmlNodePtr cur, int visible) +{ + int ret; + xmlC14NVisibleNsStack state; + int parent_is_doc = 0; + + if ((ctx == NULL) || (cur == NULL) || (cur->type != XML_ELEMENT_NODE)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessElementNode: Null context or node pointer or type != XML_ELEMENT_NODE.\n"); +#endif + return (-1); + } + + /* + * Check relative relative namespaces: + * implementations of XML canonicalization MUST report an operation + * failure on documents containing relative namespace URIs. + */ + if (xmlC14NCheckForRelativeNamespaces(ctx, cur) < 0) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessElementNode: xmlC14NCheckForRelativeNamespaces failed.\n"); +#endif + return (-1); + } + + + /* + * Save ns_rendered stack position + */ + xmlC14NVisibleNsStackSave(ctx->ns_rendered, &state); + + if (visible) { + if (ctx->parent_is_doc) { + /* save this flag into the stack */ + parent_is_doc = ctx->parent_is_doc; + ctx->parent_is_doc = 0; + ctx->pos = XMLC14N_INSIDE_DOCUMENT_ELEMENT; + } + xmlOutputBufferWriteString(ctx->buf, "<"); + + if ((cur->ns != NULL) && (xmlStrlen(cur->ns->prefix) > 0)) { + xmlOutputBufferWriteString(ctx->buf, + (const char *) cur->ns->prefix); + xmlOutputBufferWriteString(ctx->buf, ":"); + } + xmlOutputBufferWriteString(ctx->buf, (const char *) cur->name); + } + + if (!ctx->exclusive) { + ret = xmlC14NProcessNamespacesAxis(ctx, cur, visible); + } else { + ret = xmlExcC14NProcessNamespacesAxis(ctx, cur, visible); + } + if (ret < 0) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessElementNode: xmlC14NProcessNamespacesAxis failed.\n"); +#endif + return (-1); + } + /* todo: shouldn't this go to "visible only"? */ + if(visible) { + xmlC14NVisibleNsStackShift(ctx->ns_rendered); + } + + if(visible) { + ret = xmlC14NProcessAttrsAxis(ctx, cur); + if (ret < 0) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessElementNode: xmlC14NProcessAttrsAxis failed.\n"); +#endif + return (-1); + } + } + + if (visible) { + xmlOutputBufferWriteString(ctx->buf, ">"); + } + if (cur->children != NULL) { + ret = xmlC14NProcessNodeList(ctx, cur->children); + if (ret < 0) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessElementNode: xmlC14NProcessNodeList failed.\n"); +#endif + return (-1); + } + } + if (visible) { + xmlOutputBufferWriteString(ctx->buf, "</"); + if ((cur->ns != NULL) && (xmlStrlen(cur->ns->prefix) > 0)) { + xmlOutputBufferWriteString(ctx->buf, + (const char *) cur->ns->prefix); + xmlOutputBufferWriteString(ctx->buf, ":"); + } + xmlOutputBufferWriteString(ctx->buf, (const char *) cur->name); + xmlOutputBufferWriteString(ctx->buf, ">"); + if (parent_is_doc) { + /* restore this flag from the stack for next node */ + ctx->parent_is_doc = parent_is_doc; + ctx->pos = XMLC14N_AFTER_DOCUMENT_ELEMENT; + } + } + + /* + * Restore ns_rendered stack position + */ + xmlC14NVisibleNsStackRestore(ctx->ns_rendered, &state); + return (0); +} + +/** + * xmlC14NProcessNode: + * @ctx: the pointer to C14N context object + * @cur: the node to process + * + * Processes the given node + * + * Returns non-negative value on success or negative value on fail + */ +static int +xmlC14NProcessNode(xmlC14NCtxPtr ctx, xmlNodePtr cur) +{ + int ret = 0; + int visible; + + if ((ctx == NULL) || (cur == NULL)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: Null context or node pointer.\n"); +#endif + return (-1); + } + + visible = xmlC14NIsVisible(ctx, cur, cur->parent); + switch (cur->type) { + case XML_ELEMENT_NODE: + ret = xmlC14NProcessElementNode(ctx, cur, visible); + break; + case XML_CDATA_SECTION_NODE: + case XML_TEXT_NODE: + /* + * Text Nodes + * the string value, except all ampersands are replaced + * by &, all open angle brackets (<) are replaced by <, all closing + * angle brackets (>) are replaced by >, and all #xD characters are + * replaced by 
. + */ + /* cdata sections are processed as text nodes */ + /* todo: verify that cdata sections are included in XPath nodes set */ + if ((visible) && (cur->content != NULL)) { + xmlChar *buffer; + + buffer = xmlC11NNormalizeText(cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(ctx->buf, + (const char *) buffer); + xmlFree(buffer); + } else { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: xmlC11NNormalizeText() failed\n"); +#endif + return (-1); + } + } + break; + case XML_PI_NODE: + /* + * Processing Instruction (PI) Nodes- + * The opening PI symbol (<?), the PI target name of the node, + * a leading space and the string value if it is not empty, and + * the closing PI symbol (?>). If the string value is empty, + * then the leading space is not added. Also, a trailing #xA is + * rendered after the closing PI symbol for PI children of the + * root node with a lesser document order than the document + * element, and a leading #xA is rendered before the opening PI + * symbol of PI children of the root node with a greater document + * order than the document element. + */ + if (visible) { + if (ctx->pos == XMLC14N_AFTER_DOCUMENT_ELEMENT) { + xmlOutputBufferWriteString(ctx->buf, "\x0A<?"); + } else { + xmlOutputBufferWriteString(ctx->buf, "<?"); + } + + xmlOutputBufferWriteString(ctx->buf, + (const char *) cur->name); + if ((cur->content != NULL) && (*(cur->content) != '\0')) { + xmlChar *buffer; + + xmlOutputBufferWriteString(ctx->buf, " "); + + /* todo: do we need to normalize pi? */ + buffer = xmlC11NNormalizePI(cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(ctx->buf, + (const char *) buffer); + xmlFree(buffer); + } else { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: xmlC11NNormalizePI() failed\n"); +#endif + return (-1); + } + } + + if (ctx->pos == XMLC14N_BEFORE_DOCUMENT_ELEMENT) { + xmlOutputBufferWriteString(ctx->buf, "?>\x0A"); + } else { + xmlOutputBufferWriteString(ctx->buf, "?>"); + } + } + break; + case XML_COMMENT_NODE: + /* + * Comment Nodes + * Nothing if generating canonical XML without comments. For + * canonical XML with comments, generate the opening comment + * symbol (<!--), the string value of the node, and the + * closing comment symbol (-->). Also, a trailing #xA is rendered + * after the closing comment symbol for comment children of the + * root node with a lesser document order than the document + * element, and a leading #xA is rendered before the opening + * comment symbol of comment children of the root node with a + * greater document order than the document element. (Comment + * children of the root node represent comments outside of the + * top-level document element and outside of the document type + * declaration). + */ + if (visible && ctx->with_comments) { + if (ctx->pos == XMLC14N_AFTER_DOCUMENT_ELEMENT) { + xmlOutputBufferWriteString(ctx->buf, "\x0A<!--"); + } else { + xmlOutputBufferWriteString(ctx->buf, "<!--"); + } + + if (cur->content != NULL) { + xmlChar *buffer; + + /* todo: do we need to normalize comment? */ + buffer = xmlC11NNormalizeComment(cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(ctx->buf, + (const char *) buffer); + xmlFree(buffer); + } else { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: xmlC11NNormalizeComment() failed\n"); +#endif + return (-1); + } + } + + if (ctx->pos == XMLC14N_BEFORE_DOCUMENT_ELEMENT) { + xmlOutputBufferWriteString(ctx->buf, "-->\x0A"); + } else { + xmlOutputBufferWriteString(ctx->buf, "-->"); + } + } + break; + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_FRAG_NODE: /* should be processed as document? */ +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: /* should be processed as document? */ +#endif +#ifdef LIBXML_HTML_ENABLED + case XML_HTML_DOCUMENT_NODE: /* should be processed as document? */ +#endif + if (cur->children != NULL) { + ctx->pos = XMLC14N_BEFORE_DOCUMENT_ELEMENT; + ctx->parent_is_doc = 1; + ret = xmlC14NProcessNodeList(ctx, cur->children); + } + break; + + case XML_ATTRIBUTE_NODE: + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: XML_ATTRIBUTE_NODE is illegal here\n"); + return (-1); + case XML_NAMESPACE_DECL: + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: XML_NAMESPACE_DECL is illegal here\n"); + return (-1); + case XML_ENTITY_REF_NODE: + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: XML_ENTITY_REF_NODE is illegal here\n"); + return (-1); + case XML_ENTITY_NODE: + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: XML_ENTITY_NODE is illegal here\n"); + return (-1); + + case XML_DOCUMENT_TYPE_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: +#ifdef LIBXML_XINCLUDE_ENABLED + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: +#endif + /* + * should be ignored according to "W3C Canonical XML" + */ + break; + default: +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNode: unknown node type = %d\n", + cur->type); +#endif + return (-1); + } + + return (ret); +} + +/** + * xmlC14NProcessNodeList: + * @ctx: the pointer to C14N context object + * @cur: the node to start from + * + * Processes all nodes in the row starting from cur. + * + * Returns non-negative value on success or negative value on fail + */ +static int +xmlC14NProcessNodeList(xmlC14NCtxPtr ctx, xmlNodePtr cur) +{ + int ret; + + if (ctx == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NProcessNodeList: Null context pointer.\n"); +#endif + return (-1); + } + + for (ret = 0; cur != NULL && ret >= 0; cur = cur->next) { + ret = xmlC14NProcessNode(ctx, cur); + } + return (ret); +} + + +/** + * xmlC14NFreeCtx: + * @ctx: the pointer to C14N context object + * + * Cleanups the C14N context object. + */ + +static void +xmlC14NFreeCtx(xmlC14NCtxPtr ctx) +{ + if (ctx == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NFreeCtx: ctx == NULL\n"); +#endif + return; + } + + if (ctx->ns_rendered != NULL) { + xmlC14NVisibleNsStackDestroy(ctx->ns_rendered); + } + xmlFree(ctx); +} + +/** + * xmlC14NNewCtx: + * @doc: the XML document for canonization + * @is_visible_callback:the function to use to determine is node visible + * or not + * @user_data: the first parameter for @is_visible_callback function + * (in most cases, it is nodes set) + * @inclusive_ns_prefixe the list of inclusive namespace prefixes + * ended with a NULL or NULL if there is no + * inclusive namespaces (only for exclusive + * canonicalization) + * @with_comments: include comments in the result (!=0) or not (==0) + * @buf: the output buffer to store canonical XML; this + * buffer MUST have encoder==NULL because C14N requires + * UTF-8 output + * + * Creates new C14N context object to store C14N parameters. + * + * Returns pointer to newly created object (success) or NULL (fail) + */ +static xmlC14NCtxPtr +xmlC14NNewCtx(xmlDocPtr doc, + xmlC14NIsVisibleCallback is_visible_callback, void* user_data, + int exclusive, xmlChar ** inclusive_ns_prefixes, + int with_comments, xmlOutputBufferPtr buf) +{ + xmlC14NCtxPtr ctx; + + if ((doc == NULL) || (buf == NULL)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NNewCtx: pointer to document or output buffer is NULL\n"); +#endif + return (NULL); + } + + /* + * Validate the encoding output buffer encoding + */ + if (buf->encoder != NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NNewCtx: output buffer encoder != NULL but C14N requires UTF8 output\n"); + return (NULL); + } + + /* + * Validate the XML document encoding value, if provided. + */ + if (doc->charset != XML_CHAR_ENCODING_UTF8) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NNewCtx: source document not in UTF8\n"); + return (NULL); + } + + /* + * Allocate a new xmlC14NCtxPtr and fill the fields. + */ + ctx = (xmlC14NCtxPtr) xmlMalloc(sizeof(xmlC14NCtx)); + if (ctx == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NNewCtx: malloc failed\n"); + return (NULL); + } + memset(ctx, 0, sizeof(xmlC14NCtx)); + + /* + * initialize C14N context + */ + ctx->doc = doc; + ctx->with_comments = with_comments; + ctx->is_visible_callback = is_visible_callback; + ctx->user_data = user_data; + ctx->buf = buf; + ctx->parent_is_doc = 1; + ctx->pos = XMLC14N_BEFORE_DOCUMENT_ELEMENT; + ctx->ns_rendered = xmlC14NVisibleNsStackCreate(); + + if(ctx->ns_rendered == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NNewCtx: xmlC14NVisibleNsStackCreate failed\n"); + xmlC14NFreeCtx(ctx); + return (NULL); + } + + /* + * Set "exclusive" flag, create a nodes set for namespaces + * stack and remember list of incluseve prefixes + */ + if (exclusive) { + ctx->exclusive = 1; + ctx->inclusive_ns_prefixes = inclusive_ns_prefixes; + } + return (ctx); +} + +/** + * xmlC14NExecute: + * @doc: the XML document for canonization + * @is_visible_callback:the function to use to determine is node visible + * or not + * @user_data: the first parameter for @is_visible_callback function + * (in most cases, it is nodes set) + * @exclusive: the exclusive flag (0 - non-exclusive canonicalization; + * otherwise - exclusive canonicalization) + * @inclusive_ns_prefixes: the list of inclusive namespace prefixes + * ended with a NULL or NULL if there is no + * inclusive namespaces (only for exclusive + * canonicalization, ignored otherwise) + * @with_comments: include comments in the result (!=0) or not (==0) + * @buf: the output buffer to store canonical XML; this + * buffer MUST have encoder==NULL because C14N requires + * UTF-8 output + * + * Dumps the canonized image of given XML document into the provided buffer. + * For details see "Canonical XML" (http://www.w3.org/TR/xml-c14n) or + * "Exclusive XML Canonicalization" (http://www.w3.org/TR/xml-exc-c14n) + * + * Returns non-negative value on success or a negative value on fail + */ +int +xmlC14NExecute(xmlDocPtr doc, xmlC14NIsVisibleCallback is_visible_callback, + void* user_data, int exclusive, xmlChar **inclusive_ns_prefixes, + int with_comments, xmlOutputBufferPtr buf) { + + xmlC14NCtxPtr ctx; + int ret; + + if ((buf == NULL) || (doc == NULL)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NExecute: null return buffer or doc pointer\n"); +#endif + return (-1); + } + + /* + * Validate the encoding output buffer encoding + */ + if (buf->encoder != NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NExecute: output buffer encoder != NULL but C14N requires UTF8 output\n"); + return (-1); + } + + ctx = xmlC14NNewCtx(doc, is_visible_callback, user_data, + exclusive, inclusive_ns_prefixes, + with_comments, buf); + if (ctx == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlC14NExecute: unable to create C14N context\n"); + return (-1); + } + + + + /* + * Root Node + * The root node is the parent of the top-level document element. The + * result of processing each of its child nodes that is in the node-set + * in document order. The root node does not generate a byte order mark, + * XML declaration, nor anything from within the document type + * declaration. + */ + if (doc->children != NULL) { + ret = xmlC14NProcessNodeList(ctx, doc->children); + if (ret < 0) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NExecute: process childrens' list failed.\n"); +#endif + xmlC14NFreeCtx(ctx); + return (-1); + } + } + + /* + * Flush buffer to get number of bytes written + */ + ret = xmlOutputBufferFlush(buf); + if (ret < 0) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NExecute: buffer flush failed.\n"); +#endif + xmlC14NFreeCtx(ctx); + return (-1); + } + + /* + * Cleanup + */ + xmlC14NFreeCtx(ctx); + return (ret); +} + +/** + * xmlC14NDocSaveTo: + * @doc: the XML document for canonization + * @nodes: the nodes set to be included in the canonized image + * or NULL if all document nodes should be included + * @exclusive: the exclusive flag (0 - non-exclusive canonicalization; + * otherwise - exclusive canonicalization) + * @inclusive_ns_prefixes: the list of inclusive namespace prefixes + * ended with a NULL or NULL if there is no + * inclusive namespaces (only for exclusive + * canonicalization, ignored otherwise) + * @with_comments: include comments in the result (!=0) or not (==0) + * @buf: the output buffer to store canonical XML; this + * buffer MUST have encoder==NULL because C14N requires + * UTF-8 output + * + * Dumps the canonized image of given XML document into the provided buffer. + * For details see "Canonical XML" (http://www.w3.org/TR/xml-c14n) or + * "Exclusive XML Canonicalization" (http://www.w3.org/TR/xml-exc-c14n) + * + * Returns non-negative value on success or a negative value on fail + */ +int +xmlC14NDocSaveTo(xmlDocPtr doc, xmlNodeSetPtr nodes, + int exclusive, xmlChar ** inclusive_ns_prefixes, + int with_comments, xmlOutputBufferPtr buf) { + return(xmlC14NExecute(doc, + (xmlC14NIsVisibleCallback)xmlC14NIsNodeInNodeset, + nodes, + exclusive, + inclusive_ns_prefixes, + with_comments, + buf)); +} + + +/** + * xmlC14NDocDumpMemory: + * @doc: the XML document for canonization + * @nodes: the nodes set to be included in the canonized image + * or NULL if all document nodes should be included + * @exclusive: the exclusive flag (0 - non-exclusive canonicalization; + * otherwise - exclusive canonicalization) + * @inclusive_ns_prefixes: the list of inclusive namespace prefixes + * ended with a NULL or NULL if there is no + * inclusive namespaces (only for exclusive + * canonicalization, ignored otherwise) + * @with_comments: include comments in the result (!=0) or not (==0) + * @doc_txt_ptr: the memory pointer for allocated canonical XML text; + * the caller of this functions is responsible for calling + * xmlFree() to free allocated memory + * + * Dumps the canonized image of given XML document into memory. + * For details see "Canonical XML" (http://www.w3.org/TR/xml-c14n) or + * "Exclusive XML Canonicalization" (http://www.w3.org/TR/xml-exc-c14n) + * + * Returns the number of bytes written on success or a negative value on fail + */ +int +xmlC14NDocDumpMemory(xmlDocPtr doc, xmlNodeSetPtr nodes, + int exclusive, xmlChar ** inclusive_ns_prefixes, + int with_comments, xmlChar ** doc_txt_ptr) +{ + int ret; + xmlOutputBufferPtr buf; + + if (doc_txt_ptr == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NDocDumpMemory: null return buffer pointer\n"); +#endif + return (-1); + } + + *doc_txt_ptr = NULL; + + /* + * create memory buffer with UTF8 (default) encoding + */ + buf = xmlAllocOutputBuffer(NULL); + if (buf == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NDocDumpMemory: failed to allocate output buffer.\n"); +#endif + return (-1); + } + + /* + * canonize document and write to buffer + */ + ret = xmlC14NDocSaveTo(doc, nodes, exclusive, inclusive_ns_prefixes, + with_comments, buf); + if (ret < 0) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NDocDumpMemory: xmlC14NDocSaveTo failed.\n"); +#endif + (void) xmlOutputBufferClose(buf); + return (-1); + } + + ret = buf->buffer->use; + if (ret > 0) { + *doc_txt_ptr = xmlStrndup(buf->buffer->content, ret); + } + (void) xmlOutputBufferClose(buf); + + if ((*doc_txt_ptr == NULL) && (ret > 0)) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NDocDumpMemory: failed to allocate memory for document text representation\n"); +#endif + return (-1); + } + return (ret); +} + +/** + * xmlC14NDocSave: + * @doc: the XML document for canonization + * @nodes: the nodes set to be included in the canonized image + * or NULL if all document nodes should be included + * @exclusive: the exclusive flag (0 - non-exclusive canonicalization; + * otherwise - exclusive canonicalization) + * @inclusive_ns_prefixes: the list of inclusive namespace prefixes + * ended with a NULL or NULL if there is no + * inclusive namespaces (only for exclusive + * canonicalization, ignored otherwise) + * @with_comments: include comments in the result (!=0) or not (==0) + * @filename: the filename to store canonical XML image + * @compression: the compression level (zlib requred): + * -1 - libxml default, + * 0 - uncompressed, + * >0 - compression level + * + * Dumps the canonized image of given XML document into the file. + * For details see "Canonical XML" (http://www.w3.org/TR/xml-c14n) or + * "Exclusive XML Canonicalization" (http://www.w3.org/TR/xml-exc-c14n) + * + * Returns the number of bytes written success or a negative value on fail + */ +int +xmlC14NDocSave(xmlDocPtr doc, xmlNodeSetPtr nodes, + int exclusive, xmlChar ** inclusive_ns_prefixes, + int with_comments, const char *filename, int compression) +{ + xmlOutputBufferPtr buf; + int ret; + + if (filename == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NDocSave: filename is NULL\n"); +#endif + return (-1); + } +#ifdef HAVE_ZLIB_H + if (compression < 0) + compression = xmlGetCompressMode(); +#endif + + /* + * save the content to a temp buffer, use default UTF8 encoding. + */ + buf = xmlOutputBufferCreateFilename(filename, NULL, compression); + if (buf == NULL) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NDocSave: unable to create buffer for file=\"%s\" with compressin=%d\n", + filename, compression); +#endif + return (-1); + } + + /* + * canonize document and write to buffer + */ + ret = xmlC14NDocSaveTo(doc, nodes, exclusive, inclusive_ns_prefixes, + with_comments, buf); + if (ret < 0) { +#ifdef DEBUG_C14N + xmlGenericError(xmlGenericErrorContext, + "xmlC14NDocSave: xmlC14NDocSaveTo failed.\n"); +#endif + (void) xmlOutputBufferClose(buf); + return (-1); + } + + /* + * get the numbers of bytes written + */ + ret = xmlOutputBufferClose(buf); + return (ret); +} + + + +/* + * Macro used to grow the current buffer. + */ +#define growBufferReentrant() { \ + buffer_size *= 2; \ + buffer = (xmlChar *) \ + xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + xmlGenericError(xmlGenericErrorContext, "realloc failed"); \ + return(NULL); \ + } \ +} + +/** + * xmlC11NNormalizeString: + * @input: the input string + * @mode: the normalization mode (attribute, comment, PI or text) + * + * Converts a string to a canonical (normalized) format. The code is stolen + * from xmlEncodeEntitiesReentrant(). Added normalization of \x09, \x0a, \x0A + * and the @mode parameter + * + * Returns a normalized string (caller is responsible for calling xmlFree()) + * or NULL if an error occurs + */ +static xmlChar * +xmlC11NNormalizeString(const xmlChar * input, + xmlC14NNormalizationMode mode) +{ + const xmlChar *cur = input; + xmlChar *buffer = NULL; + xmlChar *out = NULL; + int buffer_size = 0; + + if (input == NULL) + return (NULL); + + /* + * allocate an translation buffer. + */ + buffer_size = 1000; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed"); + return (NULL); + } + out = buffer; + + while (*cur != '\0') { + if ((out - buffer) > (buffer_size - 10)) { + int indx = out - buffer; + + growBufferReentrant(); + out = &buffer[indx]; + } + + if ((*cur == '<') && ((mode == XMLC14N_NORMALIZE_ATTR) || + (mode == XMLC14N_NORMALIZE_TEXT))) { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if ((*cur == '>') && (mode == XMLC14N_NORMALIZE_TEXT)) { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if ((*cur == '&') && ((mode == XMLC14N_NORMALIZE_ATTR) || + (mode == XMLC14N_NORMALIZE_TEXT))) { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if ((*cur == '"') && (mode == XMLC14N_NORMALIZE_ATTR)) { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; + } else if ((*cur == '\x09') && (mode == XMLC14N_NORMALIZE_ATTR)) { + *out++ = '&'; + *out++ = '#'; + *out++ = 'x'; + *out++ = '9'; + *out++ = ';'; + } else if ((*cur == '\x0A') && (mode == XMLC14N_NORMALIZE_ATTR)) { + *out++ = '&'; + *out++ = '#'; + *out++ = 'x'; + *out++ = 'A'; + *out++ = ';'; + } else if ((*cur == '\x0D') && ((mode == XMLC14N_NORMALIZE_ATTR) || + (mode == XMLC14N_NORMALIZE_TEXT) || + (mode == XMLC14N_NORMALIZE_COMMENT) || + (mode == XMLC14N_NORMALIZE_PI))) { + *out++ = '&'; + *out++ = '#'; + *out++ = 'x'; + *out++ = 'D'; + *out++ = ';'; + } else { + /* + * Works because on UTF-8, all extended sequences cannot + * result in bytes in the ASCII range. + */ + *out++ = *cur; + } + cur++; + } + *out++ = 0; + return (buffer); +} +#endif /* LIBXML_C14N_ENABLED */ diff --git a/bundle/libxml/catalog.c b/bundle/libxml/catalog.c new file mode 100644 index 0000000000..e0159e2b9f --- /dev/null +++ b/bundle/libxml/catalog.c @@ -0,0 +1,3502 @@ +/** + * catalog.c: set of generic Catalog related routines + * + * Reference: SGML Open Technical Resolution TR9401:1997. + * http://www.jclark.com/sp/catalog.htm + * + * XML Catalogs Working Draft 06 August 2001 + * http://www.oasis-open.org/committees/entity/spec-2001-08-06.html + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@imag.fr + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_CATALOG_ENABLED +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/hash.h> +#include <libxml/uri.h> +#include <libxml/parserInternals.h> +#include <libxml/catalog.h> +#include <libxml/xmlerror.h> +#include <libxml/threads.h> +#include <libxml/globals.h> + +#define MAX_DELEGATE 50 + +/** + * TODO: + * + * macro to flag unimplemented blocks + */ +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#define XML_URN_PUBID "urn:publicid:" +#define XML_CATAL_BREAK ((xmlChar *) -1) +#ifndef XML_XML_DEFAULT_CATALOG +#define XML_XML_DEFAULT_CATALOG "file:///etc/xml/catalog" +#endif +#ifndef XML_SGML_DEFAULT_CATALOG +#define XML_SGML_DEFAULT_CATALOG "file:///etc/sgml/catalog" +#endif + +static int xmlExpandCatalog(xmlCatalogPtr catal, const char *filename); + +/************************************************************************ + * * + * Types, all private * + * * + ************************************************************************/ + +typedef enum { + XML_CATA_REMOVED = -1, + XML_CATA_NONE = 0, + XML_CATA_CATALOG, + XML_CATA_BROKEN_CATALOG, + XML_CATA_NEXT_CATALOG, + XML_CATA_PUBLIC, + XML_CATA_SYSTEM, + XML_CATA_REWRITE_SYSTEM, + XML_CATA_DELEGATE_PUBLIC, + XML_CATA_DELEGATE_SYSTEM, + XML_CATA_URI, + XML_CATA_REWRITE_URI, + XML_CATA_DELEGATE_URI, + SGML_CATA_SYSTEM, + SGML_CATA_PUBLIC, + SGML_CATA_ENTITY, + SGML_CATA_PENTITY, + SGML_CATA_DOCTYPE, + SGML_CATA_LINKTYPE, + SGML_CATA_NOTATION, + SGML_CATA_DELEGATE, + SGML_CATA_BASE, + SGML_CATA_CATALOG, + SGML_CATA_DOCUMENT, + SGML_CATA_SGMLDECL +} xmlCatalogEntryType; + +typedef struct _xmlCatalogEntry xmlCatalogEntry; +typedef xmlCatalogEntry *xmlCatalogEntryPtr; +struct _xmlCatalogEntry { + struct _xmlCatalogEntry *next; + struct _xmlCatalogEntry *parent; + struct _xmlCatalogEntry *children; + xmlCatalogEntryType type; + xmlChar *name; + xmlChar *value; + xmlChar *URL; /* The expanded URL using the base */ + xmlCatalogPrefer prefer; + int dealloc; +}; + +typedef enum { + XML_XML_CATALOG_TYPE = 1, + XML_SGML_CATALOG_TYPE +} xmlCatalogType; + +#define XML_MAX_SGML_CATA_DEPTH 10 +struct _xmlCatalog { + xmlCatalogType type; /* either XML or SGML */ + + /* + * SGML Catalogs are stored as a simple hash table of catalog entries + * Catalog stack to check against overflows when building the + * SGML catalog + */ + char *catalTab[XML_MAX_SGML_CATA_DEPTH]; /* stack of catals */ + int catalNr; /* Number of current catal streams */ + int catalMax; /* Max number of catal streams */ + xmlHashTablePtr sgml; + + /* + * XML Catalogs are stored as a tree of Catalog entries + */ + xmlCatalogPrefer prefer; + xmlCatalogEntryPtr xml; +}; + +/************************************************************************ + * * + * Global variables * + * * + ************************************************************************/ + +/* + * Those are preferences + */ +static int xmlDebugCatalogs = 0; /* used for debugging */ +static xmlCatalogAllow xmlCatalogDefaultAllow = XML_CATA_ALLOW_ALL; +static xmlCatalogPrefer xmlCatalogDefaultPrefer = XML_CATA_PREFER_PUBLIC; + +/* + * Hash table containing all the trees of XML catalogs parsed by + * the application. + */ +static xmlHashTablePtr xmlCatalogXMLFiles = NULL; + +/* + * The default catalog in use by the application + */ +static xmlCatalogPtr xmlDefaultCatalog = NULL; + +/* + * A mutex for modifying the shared global catalog(s) + * xmlDefaultCatalog tree. + * It also protects xmlCatalogXMLFiles + * The core of this readers/writer scheme is in xmlFetchXMLCatalogFile() + */ +static xmlRMutexPtr xmlCatalogMutex = NULL; + +/* + * Whether the catalog support was initialized. + */ +static int xmlCatalogInitialized = 0; + + +/************************************************************************ + * * + * Allocation and Freeing * + * * + ************************************************************************/ + +/** + * xmlNewCatalogEntry: + * @type: type of entry + * @name: name of the entry + * @value: value of the entry + * @prefer: the PUBLIC vs. SYSTEM current preference value + * + * create a new Catalog entry, this type is shared both by XML and + * SGML catalogs, but the acceptable types values differs. + * + * Returns the xmlCatalogEntryPtr or NULL in case of error + */ +static xmlCatalogEntryPtr +xmlNewCatalogEntry(xmlCatalogEntryType type, const xmlChar *name, + const xmlChar *value, const xmlChar *URL, xmlCatalogPrefer prefer) { + xmlCatalogEntryPtr ret; + + ret = (xmlCatalogEntryPtr) xmlMalloc(sizeof(xmlCatalogEntry)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", sizeof(xmlCatalogEntry)); + return(NULL); + } + ret->next = NULL; + ret->parent = NULL; + ret->children = NULL; + ret->type = type; + if (name != NULL) + ret->name = xmlStrdup(name); + else + ret->name = NULL; + if (value != NULL) + ret->value = xmlStrdup(value); + else + ret->value = NULL; + if (URL == NULL) + URL = value; + if (URL != NULL) + ret->URL = xmlStrdup(URL); + else + ret->URL = NULL; + ret->prefer = prefer; + ret->dealloc = 0; + return(ret); +} + +static void +xmlFreeCatalogEntryList(xmlCatalogEntryPtr ret); + +/** + * xmlFreeCatalogEntry: + * @ret: a Catalog entry + * + * Free the memory allocated to a Catalog entry + */ +static void +xmlFreeCatalogEntry(xmlCatalogEntryPtr ret) { + if (ret == NULL) + return; + /* + * Entries stored in the file hash must be deallocated + * only by the file hash cleaner ! + */ + if (ret->dealloc == 1) + return; + + if (xmlDebugCatalogs) { + if (ret->name != NULL) + xmlGenericError(xmlGenericErrorContext, + "Free catalog entry %s\n", ret->name); + else if (ret->value != NULL) + xmlGenericError(xmlGenericErrorContext, + "Free catalog entry %s\n", ret->value); + else + xmlGenericError(xmlGenericErrorContext, + "Free catalog entry\n"); + } + + if (ret->name != NULL) + xmlFree(ret->name); + if (ret->value != NULL) + xmlFree(ret->value); + if (ret->URL != NULL) + xmlFree(ret->URL); + xmlFree(ret); +} + +/** + * xmlFreeCatalogEntryList: + * @ret: a Catalog entry list + * + * Free the memory allocated to a full chained list of Catalog entries + */ +static void +xmlFreeCatalogEntryList(xmlCatalogEntryPtr ret) { + xmlCatalogEntryPtr next; + + while (ret != NULL) { + next = ret->next; + xmlFreeCatalogEntry(ret); + ret = next; + } +} + +/** + * xmlFreeCatalogHashEntryList: + * @ret: a Catalog entry list + * + * Free the memory allocated to list of Catalog entries from the + * catalog file hash. + */ +static void +xmlFreeCatalogHashEntryList(xmlCatalogEntryPtr catal) { + xmlCatalogEntryPtr children, next; + + if (catal == NULL) + return; + + children = catal->children; + while (children != NULL) { + next = children->next; + children->dealloc = 0; + children->children = NULL; + xmlFreeCatalogEntry(children); + children = next; + } + catal->dealloc = 0; + xmlFreeCatalogEntry(catal); +} + +/** + * xmlCreateNewCatalog: + * @type: type of catalog + * @prefer: the PUBLIC vs. SYSTEM current preference value + * + * create a new Catalog, this type is shared both by XML and + * SGML catalogs, but the acceptable types values differs. + * + * Returns the xmlCatalogPtr or NULL in case of error + */ +static xmlCatalogPtr +xmlCreateNewCatalog(xmlCatalogType type, xmlCatalogPrefer prefer) { + xmlCatalogPtr ret; + + ret = (xmlCatalogPtr) xmlMalloc(sizeof(xmlCatalog)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", sizeof(xmlCatalog)); + return(NULL); + } + memset(ret, 0, sizeof(xmlCatalog)); + ret->type = type; + ret->catalNr = 0; + ret->catalMax = XML_MAX_SGML_CATA_DEPTH; + ret->prefer = prefer; + if (ret->type == XML_SGML_CATALOG_TYPE) + ret->sgml = xmlHashCreate(10); + return(ret); +} + +/** + * xmlFreeCatalog: + * @catal: a Catalog entry + * + * Free the memory allocated to a Catalog + */ +void +xmlFreeCatalog(xmlCatalogPtr catal) { + if (catal == NULL) + return; + if (catal->xml != NULL) + xmlFreeCatalogEntryList(catal->xml); + if (catal->sgml != NULL) + xmlHashFree(catal->sgml, + (xmlHashDeallocator) xmlFreeCatalogEntry); + xmlFree(catal); +} + +/************************************************************************ + * * + * Serializing Catalogs * + * * + ************************************************************************/ + +/** + * xmlCatalogDumpEntry: + * @entry: the + * @out: the file. + * + * Serialize an SGML Catalog entry + */ +static void +xmlCatalogDumpEntry(xmlCatalogEntryPtr entry, FILE *out) { + if ((entry == NULL) || (out == NULL)) + return; + switch (entry->type) { + case SGML_CATA_ENTITY: + fprintf(out, "ENTITY "); break; + case SGML_CATA_PENTITY: + fprintf(out, "ENTITY %%"); break; + case SGML_CATA_DOCTYPE: + fprintf(out, "DOCTYPE "); break; + case SGML_CATA_LINKTYPE: + fprintf(out, "LINKTYPE "); break; + case SGML_CATA_NOTATION: + fprintf(out, "NOTATION "); break; + case SGML_CATA_PUBLIC: + fprintf(out, "PUBLIC "); break; + case SGML_CATA_SYSTEM: + fprintf(out, "SYSTEM "); break; + case SGML_CATA_DELEGATE: + fprintf(out, "DELEGATE "); break; + case SGML_CATA_BASE: + fprintf(out, "BASE "); break; + case SGML_CATA_CATALOG: + fprintf(out, "CATALOG "); break; + case SGML_CATA_DOCUMENT: + fprintf(out, "DOCUMENT "); break; + case SGML_CATA_SGMLDECL: + fprintf(out, "SGMLDECL "); break; + default: + return; + } + switch (entry->type) { + case SGML_CATA_ENTITY: + case SGML_CATA_PENTITY: + case SGML_CATA_DOCTYPE: + case SGML_CATA_LINKTYPE: + case SGML_CATA_NOTATION: + fprintf(out, "%s", entry->name); break; + case SGML_CATA_PUBLIC: + case SGML_CATA_SYSTEM: + case SGML_CATA_SGMLDECL: + case SGML_CATA_DOCUMENT: + case SGML_CATA_CATALOG: + case SGML_CATA_BASE: + case SGML_CATA_DELEGATE: + fprintf(out, "\"%s\"", entry->name); break; + default: + break; + } + switch (entry->type) { + case SGML_CATA_ENTITY: + case SGML_CATA_PENTITY: + case SGML_CATA_DOCTYPE: + case SGML_CATA_LINKTYPE: + case SGML_CATA_NOTATION: + case SGML_CATA_PUBLIC: + case SGML_CATA_SYSTEM: + case SGML_CATA_DELEGATE: + fprintf(out, " \"%s\"", entry->value); break; + default: + break; + } + fprintf(out, "\n"); +} + +static int +xmlDumpXMLCatalog(FILE *out, xmlCatalogEntryPtr catal) { + int ret; + xmlDocPtr doc; + xmlNsPtr ns; + xmlDtdPtr dtd; + xmlNodePtr node, catalog; + xmlOutputBufferPtr buf; + xmlCatalogEntryPtr cur; + + /* + * Rebuild a catalog + */ + doc = xmlNewDoc(NULL); + if (doc == NULL) + return(-1); + dtd = xmlNewDtd(doc, BAD_CAST "catalog", + BAD_CAST "-//OASIS//DTD Entity Resolution XML Catalog V1.0//EN", +BAD_CAST "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd"); + + xmlAddChild((xmlNodePtr) doc, (xmlNodePtr) dtd); + + ns = xmlNewNs(NULL, XML_CATALOGS_NAMESPACE, NULL); + if (ns == NULL) { + xmlFreeDoc(doc); + return(-1); + } + catalog = xmlNewDocNode(doc, ns, BAD_CAST "catalog", NULL); + if (catalog == NULL) { + xmlFreeNs(ns); + xmlFreeDoc(doc); + return(-1); + } + catalog->nsDef = ns; + xmlAddChild((xmlNodePtr) doc, catalog); + + /* + * add all the catalog entries + */ + cur = catal; + while (cur != NULL) { + switch (cur->type) { + case XML_CATA_REMOVED: + break; + case XML_CATA_BROKEN_CATALOG: + case XML_CATA_CATALOG: + if (cur == catal) { + cur = cur->children; + continue; + } + break; + case XML_CATA_NEXT_CATALOG: + node = xmlNewDocNode(doc, ns, BAD_CAST "nextCatalog", NULL); + xmlSetProp(node, BAD_CAST "catalog", cur->value); + xmlAddChild(catalog, node); + break; + case XML_CATA_NONE: + break; + case XML_CATA_PUBLIC: + node = xmlNewDocNode(doc, ns, BAD_CAST "public", NULL); + xmlSetProp(node, BAD_CAST "publicId", cur->name); + xmlSetProp(node, BAD_CAST "uri", cur->value); + xmlAddChild(catalog, node); + break; + case XML_CATA_SYSTEM: + node = xmlNewDocNode(doc, ns, BAD_CAST "system", NULL); + xmlSetProp(node, BAD_CAST "systemId", cur->name); + xmlSetProp(node, BAD_CAST "uri", cur->value); + xmlAddChild(catalog, node); + break; + case XML_CATA_REWRITE_SYSTEM: + node = xmlNewDocNode(doc, ns, BAD_CAST "rewriteSystem", NULL); + xmlSetProp(node, BAD_CAST "systemIdStartString", cur->name); + xmlSetProp(node, BAD_CAST "rewritePrefix", cur->value); + xmlAddChild(catalog, node); + break; + case XML_CATA_DELEGATE_PUBLIC: + node = xmlNewDocNode(doc, ns, BAD_CAST "delegatePublic", NULL); + xmlSetProp(node, BAD_CAST "publicIdStartString", cur->name); + xmlSetProp(node, BAD_CAST "catalog", cur->value); + xmlAddChild(catalog, node); + break; + case XML_CATA_DELEGATE_SYSTEM: + node = xmlNewDocNode(doc, ns, BAD_CAST "delegateSystem", NULL); + xmlSetProp(node, BAD_CAST "systemIdStartString", cur->name); + xmlSetProp(node, BAD_CAST "catalog", cur->value); + xmlAddChild(catalog, node); + break; + case XML_CATA_URI: + node = xmlNewDocNode(doc, ns, BAD_CAST "uri", NULL); + xmlSetProp(node, BAD_CAST "name", cur->name); + xmlSetProp(node, BAD_CAST "uri", cur->value); + xmlAddChild(catalog, node); + break; + case XML_CATA_REWRITE_URI: + node = xmlNewDocNode(doc, ns, BAD_CAST "rewriteURI", NULL); + xmlSetProp(node, BAD_CAST "uriStartString", cur->name); + xmlSetProp(node, BAD_CAST "rewritePrefix", cur->value); + xmlAddChild(catalog, node); + break; + case XML_CATA_DELEGATE_URI: + node = xmlNewDocNode(doc, ns, BAD_CAST "delegateURI", NULL); + xmlSetProp(node, BAD_CAST "uriStartString", cur->name); + xmlSetProp(node, BAD_CAST "catalog", cur->value); + xmlAddChild(catalog, node); + break; + case SGML_CATA_SYSTEM: + case SGML_CATA_PUBLIC: + case SGML_CATA_ENTITY: + case SGML_CATA_PENTITY: + case SGML_CATA_DOCTYPE: + case SGML_CATA_LINKTYPE: + case SGML_CATA_NOTATION: + case SGML_CATA_DELEGATE: + case SGML_CATA_BASE: + case SGML_CATA_CATALOG: + case SGML_CATA_DOCUMENT: + case SGML_CATA_SGMLDECL: + break; + } + cur = cur->next; + } + + /* + * reserialize it + */ + buf = xmlOutputBufferCreateFile(out, NULL); + if (buf == NULL) { + xmlFreeDoc(doc); + return(-1); + } + ret = xmlSaveFormatFileTo(buf, doc, NULL, 1); + + /* + * Free it + */ + xmlFreeDoc(doc); + + return(ret); +} + +/************************************************************************ + * * + * Converting SGML Catalogs to XML * + * * + ************************************************************************/ + +/** + * xmlCatalogConvertEntry: + * @entry: the entry + * @catal: pointer to the catalog being converted + * + * Convert one entry from the catalog + */ +static void +xmlCatalogConvertEntry(xmlCatalogEntryPtr entry, xmlCatalogPtr catal) { + if ((entry == NULL) || (catal == NULL) || (catal->sgml == NULL) || + (catal->xml == NULL)) + return; + switch (entry->type) { + case SGML_CATA_ENTITY: + entry->type = XML_CATA_PUBLIC; + break; + case SGML_CATA_PENTITY: + entry->type = XML_CATA_PUBLIC; + break; + case SGML_CATA_DOCTYPE: + entry->type = XML_CATA_PUBLIC; + break; + case SGML_CATA_LINKTYPE: + entry->type = XML_CATA_PUBLIC; + break; + case SGML_CATA_NOTATION: + entry->type = XML_CATA_PUBLIC; + break; + case SGML_CATA_PUBLIC: + entry->type = XML_CATA_PUBLIC; + break; + case SGML_CATA_SYSTEM: + entry->type = XML_CATA_SYSTEM; + break; + case SGML_CATA_DELEGATE: + entry->type = XML_CATA_DELEGATE_PUBLIC; + break; + case SGML_CATA_CATALOG: + entry->type = XML_CATA_CATALOG; + break; + default: + xmlHashRemoveEntry(catal->sgml, entry->name, + (xmlHashDeallocator) xmlFreeCatalogEntry); + return; + } + /* + * Conversion successful, remove from the SGML catalog + * and add it to the default XML one + */ + xmlHashRemoveEntry(catal->sgml, entry->name, NULL); + entry->parent = catal->xml; + entry->next = NULL; + if (catal->xml->children == NULL) + catal->xml->children = entry; + else { + xmlCatalogEntryPtr prev; + + prev = catal->xml->children; + while (prev->next != NULL) + prev = prev->next; + prev->next = entry; + } +} + +/** + * xmlConvertSGMLCatalog: + * @catal: the catalog + * + * Convert all the SGML catalog entries as XML ones + * + * Returns the number of entries converted if successful, -1 otherwise + */ +int +xmlConvertSGMLCatalog(xmlCatalogPtr catal) { + + if ((catal == NULL) || (catal->type != XML_SGML_CATALOG_TYPE)) + return(-1); + + if (xmlDebugCatalogs) { + xmlGenericError(xmlGenericErrorContext, + "Converting SGML catalog to XML\n"); + } + xmlHashScan(catal->sgml, + (xmlHashScanner) xmlCatalogConvertEntry, + &catal); + return(0); +} + +/************************************************************************ + * * + * Helper function * + * * + ************************************************************************/ + +/** + * xmlCatalogUnWrapURN: + * @urn: an "urn:publicid:" to unwrap + * + * Expand the URN into the equivalent Public Identifier + * + * Returns the new identifier or NULL, the string must be deallocated + * by the caller. + */ +static xmlChar * +xmlCatalogUnWrapURN(const xmlChar *urn) { + xmlChar result[2000]; + unsigned int i = 0; + + if (xmlStrncmp(urn, BAD_CAST XML_URN_PUBID, sizeof(XML_URN_PUBID) - 1)) + return(NULL); + urn += sizeof(XML_URN_PUBID) - 1; + + while (*urn != 0) { + if (i > sizeof(result) - 3) + break; + if (*urn == '+') { + result[i++] = ' '; + urn++; + } else if (*urn == ':') { + result[i++] = '/'; + result[i++] = '/'; + urn++; + } else if (*urn == ';') { + result[i++] = ':'; + result[i++] = ':'; + urn++; + } else if (*urn == '%') { + if ((urn[1] == '2') && (urn[1] == 'B')) + result[i++] = '+'; + else if ((urn[1] == '3') && (urn[1] == 'A')) + result[i++] = ':'; + else if ((urn[1] == '2') && (urn[1] == 'F')) + result[i++] = '/'; + else if ((urn[1] == '3') && (urn[1] == 'B')) + result[i++] = ';'; + else if ((urn[1] == '2') && (urn[1] == '7')) + result[i++] = '\''; + else if ((urn[1] == '3') && (urn[1] == 'F')) + result[i++] = '?'; + else if ((urn[1] == '2') && (urn[1] == '3')) + result[i++] = '#'; + else if ((urn[1] == '2') && (urn[1] == '5')) + result[i++] = '%'; + else { + result[i++] = *urn; + urn++; + continue; + } + urn += 3; + } else { + result[i++] = *urn; + urn++; + } + } + result[i] = 0; + + return(xmlStrdup(result)); +} + +/** + * xmlParseCatalogFile: + * @filename: the filename + * + * parse an XML file and build a tree. It's like xmlParseFile() + * except it bypass all catalog lookups. + * + * Returns the resulting document tree or NULL in case of error + */ + +xmlDocPtr +xmlParseCatalogFile(const char *filename) { + xmlDocPtr ret; + xmlParserCtxtPtr ctxt; + char *directory = NULL; + xmlParserInputPtr inputStream; + xmlParserInputBufferPtr buf; + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + if (xmlDefaultSAXHandler.error != NULL) { + xmlDefaultSAXHandler.error(NULL, "out of memory\n"); + } + return(NULL); + } + + buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); + if (buf == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + inputStream = xmlNewInputStream(ctxt); + if (inputStream == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + inputStream->filename = xmlMemStrdup(filename); + inputStream->buf = buf; + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->end = + &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; + + inputPush(ctxt, inputStream); + if ((ctxt->directory == NULL) && (directory == NULL)) + directory = xmlParserGetDirectory(filename); + if ((ctxt->directory == NULL) && (directory != NULL)) + ctxt->directory = directory; + ctxt->valid = 0; + ctxt->validate = 0; + ctxt->loadsubset = 0; + ctxt->pedantic = 0; + + xmlParseDocument(ctxt); + + if (ctxt->wellFormed) + ret = ctxt->myDoc; + else { + ret = NULL; + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } + xmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * xmlLoadFileContent: + * @filename: a file path + * + * Load a file content into memory. + * + * Returns a pointer to the 0 terminated string or NULL in case of error + */ +static xmlChar * +xmlLoadFileContent(const char *filename) +{ +#ifdef HAVE_STAT + int fd; +#else + FILE *fd; +#endif + int len; + long size; + +#ifdef HAVE_STAT + struct stat info; +#endif + xmlChar *content; + + if (filename == NULL) + return (NULL); + +#ifdef HAVE_STAT + if (stat(filename, &info) < 0) + return (NULL); +#endif + +#ifdef HAVE_STAT + if ((fd = open(filename, O_RDONLY)) < 0) +#else + if ((fd = fopen(filename, "rb")) == NULL) +#endif + { + return (NULL); + } +#ifdef HAVE_STAT + size = info.st_size; +#else + if (fseek(fd, 0, SEEK_END) || (size = ftell(fd)) == EOF || fseek(fd, 0, SEEK_SET)) { /* File operations denied? ok, just close and return failure */ + fclose(fd); + return (NULL); + } +#endif + content = xmlMalloc(size + 10); + if (content == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size + 10); + return (NULL); + } +#ifdef HAVE_STAT + len = read(fd, content, size); +#else + len = fread(content, 1, size, fd); +#endif + if (len < 0) { + xmlFree(content); + return (NULL); + } +#ifdef HAVE_STAT + close(fd); +#else + fclose(fd); +#endif + content[len] = 0; + + return(content); +} + +/************************************************************************ + * * + * The XML Catalog parser * + * * + ************************************************************************/ + +static xmlCatalogEntryPtr +xmlParseXMLCatalogFile(xmlCatalogPrefer prefer, const xmlChar *filename); +static void +xmlParseXMLCatalogNodeList(xmlNodePtr cur, xmlCatalogPrefer prefer, + xmlCatalogEntryPtr parent); +static xmlChar * +xmlCatalogListXMLResolve(xmlCatalogEntryPtr catal, const xmlChar *pubID, + const xmlChar *sysID); +static xmlChar * +xmlCatalogListXMLResolveURI(xmlCatalogEntryPtr catal, const xmlChar *URI); + + +/** + * xmlGetXMLCatalogEntryType: + * @name: the name + * + * lookup the internal type associated to an XML catalog entry name + * + * Returns the type associate with that name + */ +static xmlCatalogEntryType +xmlGetXMLCatalogEntryType(const xmlChar *name) { + xmlCatalogEntryType type = XML_CATA_NONE; + if (xmlStrEqual(name, (const xmlChar *) "system")) + type = XML_CATA_SYSTEM; + else if (xmlStrEqual(name, (const xmlChar *) "public")) + type = XML_CATA_PUBLIC; + else if (xmlStrEqual(name, (const xmlChar *) "rewriteSystem")) + type = XML_CATA_REWRITE_SYSTEM; + else if (xmlStrEqual(name, (const xmlChar *) "delegatePublic")) + type = XML_CATA_DELEGATE_PUBLIC; + else if (xmlStrEqual(name, (const xmlChar *) "delegateSystem")) + type = XML_CATA_DELEGATE_SYSTEM; + else if (xmlStrEqual(name, (const xmlChar *) "uri")) + type = XML_CATA_URI; + else if (xmlStrEqual(name, (const xmlChar *) "rewriteURI")) + type = XML_CATA_REWRITE_URI; + else if (xmlStrEqual(name, (const xmlChar *) "delegateURI")) + type = XML_CATA_DELEGATE_URI; + else if (xmlStrEqual(name, (const xmlChar *) "nextCatalog")) + type = XML_CATA_NEXT_CATALOG; + else if (xmlStrEqual(name, (const xmlChar *) "catalog")) + type = XML_CATA_CATALOG; + return(type); +} + +/** + * xmlParseXMLCatalogOneNode: + * @cur: the XML node + * @type: the type of Catalog entry + * @name: the name of the node + * @attrName: the attribute holding the value + * @uriAttrName: the attribute holding the URI-Reference + * @prefer: the PUBLIC vs. SYSTEM current preference value + * + * Finishes the examination of an XML tree node of a catalog and build + * a Catalog entry from it. + * + * Returns the new Catalog entry node or NULL in case of error. + */ +static xmlCatalogEntryPtr +xmlParseXMLCatalogOneNode(xmlNodePtr cur, xmlCatalogEntryType type, + const xmlChar *name, const xmlChar *attrName, + const xmlChar *uriAttrName, xmlCatalogPrefer prefer) { + int ok = 1; + xmlChar *uriValue; + xmlChar *nameValue = NULL; + xmlChar *base = NULL; + xmlChar *URL = NULL; + xmlCatalogEntryPtr ret = NULL; + + if (attrName != NULL) { + nameValue = xmlGetProp(cur, attrName); + if (nameValue == NULL) { + xmlGenericError(xmlGenericErrorContext, + "%s entry lacks '%s'\n", name, attrName); + ok = 0; + } + } + uriValue = xmlGetProp(cur, uriAttrName); + if (uriValue == NULL) { + xmlGenericError(xmlGenericErrorContext, + "%s entry lacks '%s'\n", name, uriAttrName); + ok = 0; + } + if (!ok) { + if (nameValue != NULL) + xmlFree(nameValue); + if (uriValue != NULL) + xmlFree(uriValue); + return(NULL); + } + + base = xmlNodeGetBase(cur->doc, cur); + URL = xmlBuildURI(uriValue, base); + if (URL != NULL) { + if (xmlDebugCatalogs > 1) { + if (nameValue != NULL) + xmlGenericError(xmlGenericErrorContext, + "Found %s: '%s' '%s'\n", name, nameValue, URL); + else + xmlGenericError(xmlGenericErrorContext, + "Found %s: '%s'\n", name, URL); + } + ret = xmlNewCatalogEntry(type, nameValue, uriValue, URL, prefer); + } else { + xmlGenericError(xmlGenericErrorContext, + "%s entry '%s' broken ?: %s\n", name, uriAttrName, uriValue); + } + if (nameValue != NULL) + xmlFree(nameValue); + if (uriValue != NULL) + xmlFree(uriValue); + if (base != NULL) + xmlFree(base); + if (URL != NULL) + xmlFree(URL); + return(ret); +} + +/** + * xmlParseXMLCatalogNode: + * @cur: the XML node + * @prefer: the PUBLIC vs. SYSTEM current preference value + * @parent: the parent Catalog entry + * + * Examines an XML tree node of a catalog and build + * a Catalog entry from it adding it to its parent. The examination can + * be recursive. + */ +static void +xmlParseXMLCatalogNode(xmlNodePtr cur, xmlCatalogPrefer prefer, + xmlCatalogEntryPtr parent) +{ + xmlChar *uri = NULL; + xmlChar *URL = NULL; + xmlChar *base = NULL; + xmlCatalogEntryPtr entry = NULL; + + if (cur == NULL) + return; + if (xmlStrEqual(cur->name, BAD_CAST "group")) { + xmlChar *prop; + + prop = xmlGetProp(cur, BAD_CAST "prefer"); + if (prop != NULL) { + if (xmlStrEqual(prop, BAD_CAST "system")) { + prefer = XML_CATA_PREFER_SYSTEM; + } else if (xmlStrEqual(prop, BAD_CAST "public")) { + prefer = XML_CATA_PREFER_PUBLIC; + } else { + xmlGenericError(xmlGenericErrorContext, + "Invalid value for prefer: '%s'\n", prop); + } + xmlFree(prop); + } + /* + * Recurse to propagate prefer to the subtree + * (xml:base handling is automated) + */ + xmlParseXMLCatalogNodeList(cur->children, prefer, parent); + } else if (xmlStrEqual(cur->name, BAD_CAST "public")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_PUBLIC, + BAD_CAST "public", BAD_CAST "publicId", BAD_CAST "uri", prefer); + } else if (xmlStrEqual(cur->name, BAD_CAST "system")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_SYSTEM, + BAD_CAST "system", BAD_CAST "systemId", BAD_CAST "uri", prefer); + } else if (xmlStrEqual(cur->name, BAD_CAST "rewriteSystem")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_REWRITE_SYSTEM, + BAD_CAST "rewriteSystem", BAD_CAST "systemIdStartString", + BAD_CAST "rewritePrefix", prefer); + } else if (xmlStrEqual(cur->name, BAD_CAST "delegatePublic")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_DELEGATE_PUBLIC, + BAD_CAST "delegatePublic", BAD_CAST "publicIdStartString", + BAD_CAST "catalog", prefer); + } else if (xmlStrEqual(cur->name, BAD_CAST "delegateSystem")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_DELEGATE_SYSTEM, + BAD_CAST "delegateSystem", BAD_CAST "systemIdStartString", + BAD_CAST "catalog", prefer); + } else if (xmlStrEqual(cur->name, BAD_CAST "uri")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_URI, + BAD_CAST "uri", BAD_CAST "name", + BAD_CAST "uri", prefer); + } else if (xmlStrEqual(cur->name, BAD_CAST "rewriteURI")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_REWRITE_URI, + BAD_CAST "rewriteURI", BAD_CAST "uriStartString", + BAD_CAST "rewritePrefix", prefer); + } else if (xmlStrEqual(cur->name, BAD_CAST "delegateURI")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_DELEGATE_URI, + BAD_CAST "delegateURI", BAD_CAST "uriStartString", + BAD_CAST "catalog", prefer); + } else if (xmlStrEqual(cur->name, BAD_CAST "nextCatalog")) { + entry = xmlParseXMLCatalogOneNode(cur, XML_CATA_NEXT_CATALOG, + BAD_CAST "nextCatalog", NULL, + BAD_CAST "catalog", prefer); + } + if ((entry != NULL) && (parent != NULL)) { + entry->parent = parent; + if (parent->children == NULL) + parent->children = entry; + else { + xmlCatalogEntryPtr prev; + + prev = parent->children; + while (prev->next != NULL) + prev = prev->next; + prev->next = entry; + } + } + if (base != NULL) + xmlFree(base); + if (uri != NULL) + xmlFree(uri); + if (URL != NULL) + xmlFree(URL); +} + +/** + * xmlParseXMLCatalogNodeList: + * @cur: the XML node list of siblings + * @prefer: the PUBLIC vs. SYSTEM current preference value + * @parent: the parent Catalog entry + * + * Examines a list of XML sibling nodes of a catalog and build + * a list of Catalog entry from it adding it to the parent. + * The examination will recurse to examine node subtrees. + */ +static void +xmlParseXMLCatalogNodeList(xmlNodePtr cur, xmlCatalogPrefer prefer, + xmlCatalogEntryPtr parent) { + while (cur != NULL) { + if ((cur->ns != NULL) && (cur->ns->href != NULL) && + (xmlStrEqual(cur->ns->href, XML_CATALOGS_NAMESPACE))) { + xmlParseXMLCatalogNode(cur, prefer, parent); + } + cur = cur->next; + } + /* TODO: sort the list according to REWRITE lengths and prefer value */ +} + +/** + * xmlParseXMLCatalogFile: + * @prefer: the PUBLIC vs. SYSTEM current preference value + * @filename: the filename for the catalog + * + * Parses the catalog file to extract the XML tree and then analyze the + * tree to build a list of Catalog entries corresponding to this catalog + * + * Returns the resulting Catalog entries list + */ +static xmlCatalogEntryPtr +xmlParseXMLCatalogFile(xmlCatalogPrefer prefer, const xmlChar *filename) { + xmlDocPtr doc; + xmlNodePtr cur; + xmlChar *prop; + xmlCatalogEntryPtr parent = NULL; + + if (filename == NULL) + return(NULL); + + doc = xmlParseCatalogFile((const char *) filename); + if (doc == NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Failed to parse catalog %s\n", filename); + return(NULL); + } + + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "%d Parsing catalog %s\n", xmlGetThreadId(), filename); + + cur = xmlDocGetRootElement(doc); + if ((cur != NULL) && (xmlStrEqual(cur->name, BAD_CAST "catalog")) && + (cur->ns != NULL) && (cur->ns->href != NULL) && + (xmlStrEqual(cur->ns->href, XML_CATALOGS_NAMESPACE))) { + + parent = xmlNewCatalogEntry(XML_CATA_CATALOG, NULL, + (const xmlChar *)filename, NULL, prefer); + if (parent == NULL) { + xmlFreeDoc(doc); + return(NULL); + } + + prop = xmlGetProp(cur, BAD_CAST "prefer"); + if (prop != NULL) { + if (xmlStrEqual(prop, BAD_CAST "system")) { + prefer = XML_CATA_PREFER_SYSTEM; + } else if (xmlStrEqual(prop, BAD_CAST "public")) { + prefer = XML_CATA_PREFER_PUBLIC; + } else { + xmlGenericError(xmlGenericErrorContext, + "Invalid value for prefer: '%s'\n", + prop); + } + xmlFree(prop); + } + cur = cur->children; + xmlParseXMLCatalogNodeList(cur, prefer, parent); + } else { + xmlGenericError(xmlGenericErrorContext, + "File %s is not an XML Catalog\n", filename); + xmlFreeDoc(doc); + return(NULL); + } + xmlFreeDoc(doc); + return(parent); +} + +/** + * xmlFetchXMLCatalogFile: + * @catal: an existing but incomplete catalog entry + * + * Fetch and parse the subcatalog referenced by an entry + * + * Returns 0 in case of success, -1 otherwise + */ +static int +xmlFetchXMLCatalogFile(xmlCatalogEntryPtr catal) { + xmlCatalogEntryPtr doc; + + if (catal == NULL) + return(-1); + if (catal->URL == NULL) + return(-1); + if (catal->children != NULL) + return(-1); + + /* + * lock the whole catalog for modification + */ + xmlRMutexLock(xmlCatalogMutex); + if (catal->children != NULL) { + /* Okay someone else did it in the meantime */ + xmlRMutexUnlock(xmlCatalogMutex); + return(0); + } + + if (xmlCatalogXMLFiles != NULL) { + doc = (xmlCatalogEntryPtr) + xmlHashLookup(xmlCatalogXMLFiles, catal->URL); + if (doc != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Found %s in file hash\n", catal->URL); + + if (catal->type == XML_CATA_CATALOG) + catal->children = doc->children; + else + catal->children = doc; + catal->dealloc = 0; + xmlRMutexUnlock(xmlCatalogMutex); + return(0); + } + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "%s not found in file hash\n", catal->URL); + } + + /* + * Fetch and parse. Note that xmlParseXMLCatalogFile does not + * use the existing catalog, there is no recursion allowed at + * that level. + */ + doc = xmlParseXMLCatalogFile(catal->prefer, catal->URL); + if (doc == NULL) { + catal->type = XML_CATA_BROKEN_CATALOG; + xmlRMutexUnlock(xmlCatalogMutex); + return(-1); + } + + if (catal->type == XML_CATA_CATALOG) + catal->children = doc->children; + else + catal->children = doc; + + doc->dealloc = 1; + + if (xmlCatalogXMLFiles == NULL) + xmlCatalogXMLFiles = xmlHashCreate(10); + if (xmlCatalogXMLFiles != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "%s added to file hash\n", catal->URL); + xmlHashAddEntry(xmlCatalogXMLFiles, catal->URL, doc); + } + xmlRMutexUnlock(xmlCatalogMutex); + return(0); +} + +/************************************************************************ + * * + * XML Catalog handling * + * * + ************************************************************************/ + +/** + * xmlAddXMLCatalog: + * @catal: top of an XML catalog + * @type: the type of record to add to the catalog + * @orig: the system, public or prefix to match (or NULL) + * @replace: the replacement value for the match + * + * Add an entry in the XML catalog, it may overwrite existing but + * different entries. + * + * Returns 0 if successful, -1 otherwise + */ +static int +xmlAddXMLCatalog(xmlCatalogEntryPtr catal, const xmlChar *type, + const xmlChar *orig, const xmlChar *replace) { + xmlCatalogEntryPtr cur; + xmlCatalogEntryType typ; + int doregister = 0; + + if ((catal == NULL) || + ((catal->type != XML_CATA_CATALOG) && + (catal->type != XML_CATA_BROKEN_CATALOG))) + return(-1); + if (catal->children == NULL) { + xmlFetchXMLCatalogFile(catal); + } + if (catal->children == NULL) + doregister = 1; + + typ = xmlGetXMLCatalogEntryType(type); + if (typ == XML_CATA_NONE) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Failed to add unknown element %s to catalog\n", type); + return(-1); + } + + cur = catal->children; + /* + * Might be a simple "update in place" + */ + if (cur != NULL) { + while (cur != NULL) { + if ((orig != NULL) && (cur->type == typ) && + (xmlStrEqual(orig, cur->name))) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Updating element %s to catalog\n", type); + if (cur->value != NULL) + xmlFree(cur->value); + if (cur->URL != NULL) + xmlFree(cur->URL); + cur->value = xmlStrdup(replace); + cur->URL = xmlStrdup(replace); + return(0); + } + if (cur->next == NULL) + break; + cur = cur->next; + } + } + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Adding element %s to catalog\n", type); + if (cur == NULL) + catal->children = xmlNewCatalogEntry(typ, orig, replace, + NULL, catal->prefer); + else + cur->next = xmlNewCatalogEntry(typ, orig, replace, + NULL, catal->prefer); + if (doregister) { + cur = xmlHashLookup(xmlCatalogXMLFiles, catal->URL); + if (cur != NULL) + cur->children = catal->children; + } + + return(0); +} + +/** + * xmlDelXMLCatalog: + * @catal: top of an XML catalog + * @value: the value to remove from the catalog + * + * Remove entries in the XML catalog where the value or the URI + * is equal to @value + * + * Returns the number of entries removed if successful, -1 otherwise + */ +static int +xmlDelXMLCatalog(xmlCatalogEntryPtr catal, const xmlChar *value) { + xmlCatalogEntryPtr cur; + int ret = 0; + + if ((catal == NULL) || + ((catal->type != XML_CATA_CATALOG) && + (catal->type != XML_CATA_BROKEN_CATALOG))) + return(-1); + if (value == NULL) + return(-1); + if (catal->children == NULL) { + xmlFetchXMLCatalogFile(catal); + } + + /* + * Scan the children + */ + cur = catal->children; + while (cur != NULL) { + if (((cur->name != NULL) && (xmlStrEqual(value, cur->name))) || + (xmlStrEqual(value, cur->value))) { + if (xmlDebugCatalogs) { + if (cur->name != NULL) + xmlGenericError(xmlGenericErrorContext, + "Removing element %s from catalog\n", cur->name); + else + xmlGenericError(xmlGenericErrorContext, + "Removing element %s from catalog\n", cur->value); + } + cur->type = XML_CATA_REMOVED; + } + cur = cur->next; + } + return(ret); +} + +/** + * xmlCatalogXMLResolve: + * @catal: a catalog list + * @pubId: the public ID string + * @sysId: the system ID string + * + * Do a complete resolution lookup of an External Identifier for a + * list of catalog entries. + * + * Implements (or tries to) 7.1. External Identifier Resolution + * from http://www.oasis-open.org/committees/entity/spec-2001-08-06.html + * + * Returns the URI of the resource or NULL if not found + */ +static xmlChar * +xmlCatalogXMLResolve(xmlCatalogEntryPtr catal, const xmlChar *pubID, + const xmlChar *sysID) { + xmlChar *ret = NULL; + xmlCatalogEntryPtr cur; + int haveDelegate = 0; + int haveNext = 0; + + /* + * First tries steps 2/ 3/ 4/ if a system ID is provided. + */ + if (sysID != NULL) { + xmlCatalogEntryPtr rewrite = NULL; + int lenrewrite = 0, len; + cur = catal; + haveDelegate = 0; + while (cur != NULL) { + switch (cur->type) { + case XML_CATA_SYSTEM: + if (xmlStrEqual(sysID, cur->name)) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Found system match %s\n", cur->name); + return(xmlStrdup(cur->URL)); + } + break; + case XML_CATA_REWRITE_SYSTEM: + len = xmlStrlen(cur->name); + if ((len > lenrewrite) && + (!xmlStrncmp(sysID, cur->name, len))) { + lenrewrite = len; + rewrite = cur; + } + break; + case XML_CATA_DELEGATE_SYSTEM: + if (!xmlStrncmp(sysID, cur->name, xmlStrlen(cur->name))) + haveDelegate++; + break; + case XML_CATA_NEXT_CATALOG: + haveNext++; + break; + default: + break; + } + cur = cur->next; + } + if (rewrite != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Using rewriting rule %s\n", rewrite->name); + ret = xmlStrdup(rewrite->URL); + if (ret != NULL) + ret = xmlStrcat(ret, &sysID[lenrewrite]); + return(ret); + } + if (haveDelegate) { + const xmlChar *delegates[MAX_DELEGATE]; + int nbList = 0, i; + + /* + * Assume the entries have been sorted by decreasing substring + * matches when the list was produced. + */ + cur = catal; + while (cur != NULL) { + if ((cur->type == XML_CATA_DELEGATE_SYSTEM) && + (!xmlStrncmp(sysID, cur->name, xmlStrlen(cur->name)))) { + for (i = 0;i < nbList;i++) + if (xmlStrEqual(cur->URL, delegates[i])) + break; + if (i < nbList) { + cur = cur->next; + continue; + } + if (nbList < MAX_DELEGATE) + delegates[nbList++] = cur->URL; + + if (cur->children == NULL) { + xmlFetchXMLCatalogFile(cur); + } + if (cur->children != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Trying system delegate %s\n", cur->URL); + ret = xmlCatalogListXMLResolve( + cur->children, NULL, sysID); + if (ret != NULL) + return(ret); + } + } + cur = cur->next; + } + /* + * Apply the cut algorithm explained in 4/ + */ + return(XML_CATAL_BREAK); + } + } + /* + * Then tries 5/ 6/ if a public ID is provided + */ + if (pubID != NULL) { + cur = catal; + haveDelegate = 0; + while (cur != NULL) { + switch (cur->type) { + case XML_CATA_PUBLIC: + if (xmlStrEqual(pubID, cur->name)) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Found public match %s\n", cur->name); + return(xmlStrdup(cur->URL)); + } + break; + case XML_CATA_DELEGATE_PUBLIC: + if (!xmlStrncmp(pubID, cur->name, xmlStrlen(cur->name)) && + (cur->prefer == XML_CATA_PREFER_PUBLIC)) + haveDelegate++; + break; + case XML_CATA_NEXT_CATALOG: + if (sysID == NULL) + haveNext++; + break; + default: + break; + } + cur = cur->next; + } + if (haveDelegate) { + const xmlChar *delegates[MAX_DELEGATE]; + int nbList = 0, i; + + /* + * Assume the entries have been sorted by decreasing substring + * matches when the list was produced. + */ + cur = catal; + while (cur != NULL) { + if ((cur->type == XML_CATA_DELEGATE_PUBLIC) && + (cur->prefer == XML_CATA_PREFER_PUBLIC) && + (!xmlStrncmp(pubID, cur->name, xmlStrlen(cur->name)))) { + + for (i = 0;i < nbList;i++) + if (xmlStrEqual(cur->URL, delegates[i])) + break; + if (i < nbList) { + cur = cur->next; + continue; + } + if (nbList < MAX_DELEGATE) + delegates[nbList++] = cur->URL; + + if (cur->children == NULL) { + xmlFetchXMLCatalogFile(cur); + } + if (cur->children != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Trying public delegate %s\n", cur->URL); + ret = xmlCatalogListXMLResolve( + cur->children, pubID, NULL); + if (ret != NULL) + return(ret); + } + } + cur = cur->next; + } + /* + * Apply the cut algorithm explained in 4/ + */ + return(XML_CATAL_BREAK); + } + } + if (haveNext) { + cur = catal; + while (cur != NULL) { + if (cur->type == XML_CATA_NEXT_CATALOG) { + if (cur->children == NULL) { + xmlFetchXMLCatalogFile(cur); + } + if (cur->children != NULL) { + ret = xmlCatalogListXMLResolve(cur->children, pubID, sysID); + if (ret != NULL) + return(ret); + } + } + cur = cur->next; + } + } + + return(NULL); +} + +/** + * xmlCatalogXMLResolveURI: + * @catal: a catalog list + * @URI: the URI + * @sysId: the system ID string + * + * Do a complete resolution lookup of an External Identifier for a + * list of catalog entries. + * + * Implements (or tries to) 7.2.2. URI Resolution + * from http://www.oasis-open.org/committees/entity/spec-2001-08-06.html + * + * Returns the URI of the resource or NULL if not found + */ +static xmlChar * +xmlCatalogXMLResolveURI(xmlCatalogEntryPtr catal, const xmlChar *URI) { + xmlChar *ret = NULL; + xmlCatalogEntryPtr cur; + int haveDelegate = 0; + int haveNext = 0; + xmlCatalogEntryPtr rewrite = NULL; + int lenrewrite = 0, len; + + if (catal == NULL) + return(NULL); + + if (URI == NULL) + return(NULL); + + /* + * First tries steps 2/ 3/ 4/ if a system ID is provided. + */ + cur = catal; + haveDelegate = 0; + while (cur != NULL) { + switch (cur->type) { + case XML_CATA_URI: + if (xmlStrEqual(URI, cur->name)) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Found URI match %s\n", cur->name); + return(xmlStrdup(cur->URL)); + } + break; + case XML_CATA_REWRITE_URI: + len = xmlStrlen(cur->name); + if ((len > lenrewrite) && + (!xmlStrncmp(URI, cur->name, len))) { + lenrewrite = len; + rewrite = cur; + } + break; + case XML_CATA_DELEGATE_URI: + if (!xmlStrncmp(URI, cur->name, xmlStrlen(cur->name))) + haveDelegate++; + break; + case XML_CATA_NEXT_CATALOG: + haveNext++; + break; + default: + break; + } + cur = cur->next; + } + if (rewrite != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Using rewriting rule %s\n", rewrite->name); + ret = xmlStrdup(rewrite->URL); + if (ret != NULL) + ret = xmlStrcat(ret, &URI[lenrewrite]); + return(ret); + } + if (haveDelegate) { + const xmlChar *delegates[MAX_DELEGATE]; + int nbList = 0, i; + + /* + * Assume the entries have been sorted by decreasing substring + * matches when the list was produced. + */ + cur = catal; + while (cur != NULL) { + if ((cur->type == XML_CATA_DELEGATE_SYSTEM) && + (!xmlStrncmp(URI, cur->name, xmlStrlen(cur->name)))) { + for (i = 0;i < nbList;i++) + if (xmlStrEqual(cur->URL, delegates[i])) + break; + if (i < nbList) { + cur = cur->next; + continue; + } + if (nbList < MAX_DELEGATE) + delegates[nbList++] = cur->URL; + + if (cur->children == NULL) { + xmlFetchXMLCatalogFile(cur); + } + if (cur->children != NULL) { + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Trying URI delegate %s\n", cur->URL); + ret = xmlCatalogListXMLResolveURI( + cur->children, URI); + if (ret != NULL) + return(ret); + } + } + cur = cur->next; + } + /* + * Apply the cut algorithm explained in 4/ + */ + return(XML_CATAL_BREAK); + } + if (haveNext) { + cur = catal; + while (cur != NULL) { + if (cur->type == XML_CATA_NEXT_CATALOG) { + if (cur->children == NULL) { + xmlFetchXMLCatalogFile(cur); + } + if (cur->children != NULL) { + ret = xmlCatalogListXMLResolveURI(cur->children, URI); + if (ret != NULL) + return(ret); + } + } + cur = cur->next; + } + } + + return(NULL); +} + +/** + * xmlCatalogListXMLResolve: + * @catal: a catalog list + * @pubId: the public ID string + * @sysId: the system ID string + * + * Do a complete resolution lookup of an External Identifier for a + * list of catalogs + * + * Implements (or tries to) 7.1. External Identifier Resolution + * from http://www.oasis-open.org/committees/entity/spec-2001-08-06.html + * + * Returns the URI of the resource or NULL if not found + */ +static xmlChar * +xmlCatalogListXMLResolve(xmlCatalogEntryPtr catal, const xmlChar *pubID, + const xmlChar *sysID) { + xmlChar *ret = NULL; + xmlChar *urnID = NULL; + + if (catal == NULL) + return(NULL); + if ((pubID == NULL) && (sysID == NULL)) + return(NULL); + + if (!xmlStrncmp(pubID, BAD_CAST XML_URN_PUBID, sizeof(XML_URN_PUBID) - 1)) { + urnID = xmlCatalogUnWrapURN(pubID); + if (xmlDebugCatalogs) { + if (urnID == NULL) + xmlGenericError(xmlGenericErrorContext, + "Public URN ID %s expanded to NULL\n", pubID); + else + xmlGenericError(xmlGenericErrorContext, + "Public URN ID expanded to %s\n", urnID); + } + ret = xmlCatalogListXMLResolve(catal, urnID, sysID); + if (urnID != NULL) + xmlFree(urnID); + return(ret); + } + if (!xmlStrncmp(sysID, BAD_CAST XML_URN_PUBID, sizeof(XML_URN_PUBID) - 1)) { + urnID = xmlCatalogUnWrapURN(sysID); + if (xmlDebugCatalogs) { + if (urnID == NULL) + xmlGenericError(xmlGenericErrorContext, + "System URN ID %s expanded to NULL\n", sysID); + else + xmlGenericError(xmlGenericErrorContext, + "System URN ID expanded to %s\n", urnID); + } + if (pubID == NULL) + ret = xmlCatalogListXMLResolve(catal, urnID, NULL); + else if (xmlStrEqual(pubID, urnID)) + ret = xmlCatalogListXMLResolve(catal, pubID, NULL); + else { + ret = xmlCatalogListXMLResolve(catal, pubID, NULL); + } + if (urnID != NULL) + xmlFree(urnID); + return(ret); + } + while (catal != NULL) { + if (catal->type == XML_CATA_CATALOG) { + if (catal->children == NULL) { + xmlFetchXMLCatalogFile(catal); + } + if (catal->children != NULL) { + ret = xmlCatalogXMLResolve(catal->children, pubID, sysID); + if (ret != NULL) + return(ret); + } + } + catal = catal->next; + } + return(ret); +} + +/** + * xmlCatalogListXMLResolveURI: + * @catal: a catalog list + * @URI: the URI + * + * Do a complete resolution lookup of an URI for a list of catalogs + * + * Implements (or tries to) 7.2. URI Resolution + * from http://www.oasis-open.org/committees/entity/spec-2001-08-06.html + * + * Returns the URI of the resource or NULL if not found + */ +static xmlChar * +xmlCatalogListXMLResolveURI(xmlCatalogEntryPtr catal, const xmlChar *URI) { + xmlChar *ret = NULL; + xmlChar *urnID = NULL; + + if (catal == NULL) + return(NULL); + if (URI == NULL) + return(NULL); + + if (!xmlStrncmp(URI, BAD_CAST XML_URN_PUBID, sizeof(XML_URN_PUBID) - 1)) { + urnID = xmlCatalogUnWrapURN(URI); + if (xmlDebugCatalogs) { + if (urnID == NULL) + xmlGenericError(xmlGenericErrorContext, + "URN ID %s expanded to NULL\n", URI); + else + xmlGenericError(xmlGenericErrorContext, + "URN ID expanded to %s\n", urnID); + } + ret = xmlCatalogListXMLResolve(catal, urnID, NULL); + if (urnID != NULL) + xmlFree(urnID); + return(ret); + } + while (catal != NULL) { + if (catal->type == XML_CATA_CATALOG) { + if (catal->children == NULL) { + xmlFetchXMLCatalogFile(catal); + } + if (catal->children != NULL) { + ret = xmlCatalogXMLResolveURI(catal->children, URI); + if (ret != NULL) + return(ret); + } + } + catal = catal->next; + } + return(ret); +} + +/************************************************************************ + * * + * The SGML Catalog parser * + * * + ************************************************************************/ + + +#define RAW *cur +#define NEXT cur++; +#define SKIP(x) cur += x; + +#define SKIP_BLANKS while (IS_BLANK(*cur)) NEXT; + +/** + * xmlParseSGMLCatalogComment: + * @cur: the current character + * + * Skip a comment in an SGML catalog + * + * Returns new current character + */ +static const xmlChar * +xmlParseSGMLCatalogComment(const xmlChar *cur) { + if ((cur[0] != '-') || (cur[1] != '-')) + return(cur); + SKIP(2); + while ((cur[0] != 0) && ((cur[0] != '-') || ((cur[1] != '-')))) + NEXT; + if (cur[0] == 0) { + return(NULL); + } + return(cur + 2); +} + +/** + * xmlParseSGMLCatalogPubid: + * @cur: the current character + * @id: the return location + * + * Parse an SGML catalog ID + * + * Returns new current character and store the value in @id + */ +static const xmlChar * +xmlParseSGMLCatalogPubid(const xmlChar *cur, xmlChar **id) { + xmlChar *buf = NULL; + int len = 0; + int size = 50; + xmlChar stop; + int count = 0; + + *id = NULL; + + if (RAW == '"') { + NEXT; + stop = '"'; + } else if (RAW == '\'') { + NEXT; + stop = '\''; + } else { + stop = ' '; + } + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + while (xmlIsPubidChar(*cur) || (*cur == '?')) { + if ((*cur == stop) && (stop != ' ')) + break; + if ((stop == ' ') && (IS_BLANK(*cur))) + break; + if (len + 1 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + buf[len++] = *cur; + count++; + NEXT; + } + buf[len] = 0; + if (stop == ' ') { + if (!IS_BLANK(*cur)) { + xmlFree(buf); + return(NULL); + } + } else { + if (*cur != stop) { + xmlFree(buf); + return(NULL); + } + NEXT; + } + *id = buf; + return(cur); +} + +/** + * xmlParseSGMLCatalogName: + * @cur: the current character + * @name: the return location + * + * Parse an SGML catalog name + * + * Returns new current character and store the value in @name + */ +static const xmlChar * +xmlParseSGMLCatalogName(const xmlChar *cur, xmlChar **name) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0; + int c; + + *name = NULL; + + /* + * Handler for more complex cases + */ + c = *cur; + if ((!IS_LETTER(c) && (c != '_') && (c != ':'))) { + return(NULL); + } + + while (((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':'))) { + buf[len++] = c; + cur++; + c = *cur; + if (len >= XML_MAX_NAMELEN) + return(NULL); + } + *name = xmlStrndup(buf, len); + return(cur); +} + +/** + * xmlGetSGMLCatalogEntryType: + * @name: the entry name + * + * Get the Catalog entry type for a given SGML Catalog name + * + * Returns Catalog entry type + */ +static xmlCatalogEntryType +xmlGetSGMLCatalogEntryType(const xmlChar *name) { + xmlCatalogEntryType type = XML_CATA_NONE; + if (xmlStrEqual(name, (const xmlChar *) "SYSTEM")) + type = SGML_CATA_SYSTEM; + else if (xmlStrEqual(name, (const xmlChar *) "PUBLIC")) + type = SGML_CATA_PUBLIC; + else if (xmlStrEqual(name, (const xmlChar *) "DELEGATE")) + type = SGML_CATA_DELEGATE; + else if (xmlStrEqual(name, (const xmlChar *) "ENTITY")) + type = SGML_CATA_ENTITY; + else if (xmlStrEqual(name, (const xmlChar *) "DOCTYPE")) + type = SGML_CATA_DOCTYPE; + else if (xmlStrEqual(name, (const xmlChar *) "LINKTYPE")) + type = SGML_CATA_LINKTYPE; + else if (xmlStrEqual(name, (const xmlChar *) "NOTATION")) + type = SGML_CATA_NOTATION; + else if (xmlStrEqual(name, (const xmlChar *) "SGMLDECL")) + type = SGML_CATA_SGMLDECL; + else if (xmlStrEqual(name, (const xmlChar *) "DOCUMENT")) + type = SGML_CATA_DOCUMENT; + else if (xmlStrEqual(name, (const xmlChar *) "CATALOG")) + type = SGML_CATA_CATALOG; + else if (xmlStrEqual(name, (const xmlChar *) "BASE")) + type = SGML_CATA_BASE; + else if (xmlStrEqual(name, (const xmlChar *) "DELEGATE")) + type = SGML_CATA_DELEGATE; + return(type); +} + +/** + * xmlParseSGMLCatalog: + * @catal: the SGML Catalog + * @value: the content of the SGML Catalog serialization + * @file: the filepath for the catalog + * @super: should this be handled as a Super Catalog in which case + * parsing is not recursive + * + * Parse an SGML catalog content and fill up the @catal hash table with + * the new entries found. + * + * Returns 0 in case of success, -1 in case of error. + */ +static int +xmlParseSGMLCatalog(xmlCatalogPtr catal, const xmlChar *value, + const char *file, int super) { + const xmlChar *cur = value; + xmlChar *base = NULL; + int res; + + if ((cur == NULL) || (file == NULL)) + return(-1); + base = xmlStrdup((const xmlChar *) file); + + while ((cur != NULL) && (cur[0] != 0)) { + SKIP_BLANKS; + if (cur[0] == 0) + break; + if ((cur[0] == '-') && (cur[1] == '-')) { + cur = xmlParseSGMLCatalogComment(cur); + if (cur == NULL) { + /* error */ + break; + } + } else { + xmlChar *sysid = NULL; + xmlChar *name = NULL; + xmlCatalogEntryType type = XML_CATA_NONE; + + cur = xmlParseSGMLCatalogName(cur, &name); + if (name == NULL) { + /* error */ + break; + } + if (!IS_BLANK(*cur)) { + /* error */ + break; + } + SKIP_BLANKS; + if (xmlStrEqual(name, (const xmlChar *) "SYSTEM")) + type = SGML_CATA_SYSTEM; + else if (xmlStrEqual(name, (const xmlChar *) "PUBLIC")) + type = SGML_CATA_PUBLIC; + else if (xmlStrEqual(name, (const xmlChar *) "DELEGATE")) + type = SGML_CATA_DELEGATE; + else if (xmlStrEqual(name, (const xmlChar *) "ENTITY")) + type = SGML_CATA_ENTITY; + else if (xmlStrEqual(name, (const xmlChar *) "DOCTYPE")) + type = SGML_CATA_DOCTYPE; + else if (xmlStrEqual(name, (const xmlChar *) "LINKTYPE")) + type = SGML_CATA_LINKTYPE; + else if (xmlStrEqual(name, (const xmlChar *) "NOTATION")) + type = SGML_CATA_NOTATION; + else if (xmlStrEqual(name, (const xmlChar *) "SGMLDECL")) + type = SGML_CATA_SGMLDECL; + else if (xmlStrEqual(name, (const xmlChar *) "DOCUMENT")) + type = SGML_CATA_DOCUMENT; + else if (xmlStrEqual(name, (const xmlChar *) "CATALOG")) + type = SGML_CATA_CATALOG; + else if (xmlStrEqual(name, (const xmlChar *) "BASE")) + type = SGML_CATA_BASE; + else if (xmlStrEqual(name, (const xmlChar *) "DELEGATE")) + type = SGML_CATA_DELEGATE; + else if (xmlStrEqual(name, (const xmlChar *) "OVERRIDE")) { + xmlFree(name); + cur = xmlParseSGMLCatalogName(cur, &name); + if (name == NULL) { + /* error */ + break; + } + xmlFree(name); + continue; + } + xmlFree(name); + name = NULL; + + switch(type) { + case SGML_CATA_ENTITY: + if (*cur == '%') + type = SGML_CATA_PENTITY; + case SGML_CATA_PENTITY: + case SGML_CATA_DOCTYPE: + case SGML_CATA_LINKTYPE: + case SGML_CATA_NOTATION: + cur = xmlParseSGMLCatalogName(cur, &name); + if (cur == NULL) { + /* error */ + break; + } + if (!IS_BLANK(*cur)) { + /* error */ + break; + } + SKIP_BLANKS; + cur = xmlParseSGMLCatalogPubid(cur, &sysid); + if (cur == NULL) { + /* error */ + break; + } + break; + case SGML_CATA_PUBLIC: + case SGML_CATA_SYSTEM: + case SGML_CATA_DELEGATE: + cur = xmlParseSGMLCatalogPubid(cur, &name); + if (cur == NULL) { + /* error */ + break; + } + if (!IS_BLANK(*cur)) { + /* error */ + break; + } + SKIP_BLANKS; + cur = xmlParseSGMLCatalogPubid(cur, &sysid); + if (cur == NULL) { + /* error */ + break; + } + break; + case SGML_CATA_BASE: + case SGML_CATA_CATALOG: + case SGML_CATA_DOCUMENT: + case SGML_CATA_SGMLDECL: + cur = xmlParseSGMLCatalogPubid(cur, &sysid); + if (cur == NULL) { + /* error */ + break; + } + break; + default: + break; + } + if (cur == NULL) { + if (name != NULL) + xmlFree(name); + if (sysid != NULL) + xmlFree(sysid); + break; + } else if (type == SGML_CATA_BASE) { + if (base != NULL) + xmlFree(base); + base = xmlStrdup(sysid); + } else if ((type == SGML_CATA_PUBLIC) || + (type == SGML_CATA_SYSTEM)) { + xmlChar *filename; + + filename = xmlBuildURI(sysid, base); + if (filename != NULL) { + xmlCatalogEntryPtr entry; + + entry = xmlNewCatalogEntry(type, name, filename, + NULL, XML_CATA_PREFER_NONE); + res = xmlHashAddEntry(catal->sgml, name, entry); + if (res < 0) { + xmlFreeCatalogEntry(entry); + } + xmlFree(filename); + } + + } else if (type == SGML_CATA_CATALOG) { + if (super) { + xmlCatalogEntryPtr entry; + + entry = xmlNewCatalogEntry(type, sysid, NULL, NULL, + XML_CATA_PREFER_NONE); + res = xmlHashAddEntry(catal->sgml, sysid, entry); + if (res < 0) { + xmlFreeCatalogEntry(entry); + } + } else { + xmlChar *filename; + + filename = xmlBuildURI(sysid, base); + if (filename != NULL) { + xmlExpandCatalog(catal, (const char *)filename); + xmlFree(filename); + } + } + } + /* + * drop anything else we won't handle it + */ + if (name != NULL) + xmlFree(name); + if (sysid != NULL) + xmlFree(sysid); + } + } + if (base != NULL) + xmlFree(base); + if (cur == NULL) + return(-1); + return(0); +} + +/************************************************************************ + * * + * SGML Catalog handling * + * * + ************************************************************************/ + +/** + * xmlCatalogGetSGMLPublic: + * @catal: an SGML catalog hash + * @pubId: the public ID string + * + * Try to lookup the system ID associated to a public ID + * + * Returns the system ID if found or NULL otherwise. + */ +static const xmlChar * +xmlCatalogGetSGMLPublic(xmlHashTablePtr catal, const xmlChar *pubID) { + xmlCatalogEntryPtr entry; + + if (catal == NULL) + return(NULL); + + entry = (xmlCatalogEntryPtr) xmlHashLookup(catal, pubID); + if (entry == NULL) + return(NULL); + if (entry->type == SGML_CATA_PUBLIC) + return(entry->URL); + return(NULL); +} + +/** + * xmlCatalogGetSGMLSystem: + * @catal: an SGML catalog hash + * @sysId: the public ID string + * + * Try to lookup the catalog local reference for a system ID + * + * Returns the system ID if found or NULL otherwise. + */ +static const xmlChar * +xmlCatalogGetSGMLSystem(xmlHashTablePtr catal, const xmlChar *sysID) { + xmlCatalogEntryPtr entry; + + if (catal == NULL) + return(NULL); + + entry = (xmlCatalogEntryPtr) xmlHashLookup(catal, sysID); + if (entry == NULL) + return(NULL); + if (entry->type == SGML_CATA_SYSTEM) + return(entry->URL); + return(NULL); +} + +/** + * xmlCatalogSGMLResolve: + * @catal: the SGML catalog + * @pubId: the public ID string + * @sysId: the system ID string + * + * Do a complete resolution lookup of an External Identifier + * + * Returns the URI of the resource or NULL if not found + */ +static const xmlChar * +xmlCatalogSGMLResolve(xmlCatalogPtr catal, const xmlChar *pubID, + const xmlChar *sysID) { + const xmlChar *ret = NULL; + + if (catal->sgml == NULL) + return(NULL); + + if (pubID != NULL) + ret = xmlCatalogGetSGMLPublic(catal->sgml, pubID); + if (ret != NULL) + return(ret); + if (sysID != NULL) + ret = xmlCatalogGetSGMLSystem(catal->sgml, sysID); + return(NULL); +} + +/************************************************************************ + * * + * Specific Public interfaces * + * * + ************************************************************************/ + +/** + * xmlLoadSGMLSuperCatalog: + * @filename: a file path + * + * Load an SGML super catalog. It won't expand CATALOG or DELEGATE + * references. This is only needed for manipulating SGML Super Catalogs + * like adding and removing CATALOG or DELEGATE entries. + * + * Returns the catalog parsed or NULL in case of error + */ +xmlCatalogPtr +xmlLoadSGMLSuperCatalog(const char *filename) +{ + xmlChar *content; + xmlCatalogPtr catal; + int ret; + + content = xmlLoadFileContent(filename); + if (content == NULL) + return(NULL); + + catal = xmlCreateNewCatalog(XML_SGML_CATALOG_TYPE, xmlCatalogDefaultPrefer); + if (catal == NULL) { + xmlFree(content); + return(NULL); + } + + ret = xmlParseSGMLCatalog(catal, content, filename, 1); + xmlFree(content); + if (ret < 0) { + xmlFreeCatalog(catal); + return(NULL); + } + return (catal); +} + +/** + * xmlLoadACatalog: + * @filename: a file path + * + * Load the catalog and build the associated data structures. + * This can be either an XML Catalog or an SGML Catalog + * It will recurse in SGML CATALOG entries. On the other hand XML + * Catalogs are not handled recursively. + * + * Returns the catalog parsed or NULL in case of error + */ +xmlCatalogPtr +xmlLoadACatalog(const char *filename) +{ + xmlChar *content; + xmlChar *first; + xmlCatalogPtr catal; + int ret; + + content = xmlLoadFileContent(filename); + if (content == NULL) + return(NULL); + + + first = content; + + while ((*first != 0) && (*first != '-') && (*first != '<') && + (!(((*first >= 'A') && (*first <= 'Z')) || + ((*first >= 'a') && (*first <= 'z'))))) + first++; + + if (*first != '<') { + catal = xmlCreateNewCatalog(XML_SGML_CATALOG_TYPE, xmlCatalogDefaultPrefer); + if (catal == NULL) { + xmlFree(content); + return(NULL); + } + ret = xmlParseSGMLCatalog(catal, content, filename, 0); + if (ret < 0) { + xmlFreeCatalog(catal); + xmlFree(content); + return(NULL); + } + } else { + catal = xmlCreateNewCatalog(XML_XML_CATALOG_TYPE, xmlCatalogDefaultPrefer); + if (catal == NULL) { + xmlFree(content); + return(NULL); + } + catal->xml = xmlNewCatalogEntry(XML_CATA_CATALOG, NULL, + NULL, BAD_CAST filename, xmlCatalogDefaultPrefer); + } + xmlFree(content); + return (catal); +} + +/** + * xmlExpandCatalog: + * @catal: a catalog + * @filename: a file path + * + * Load the catalog and expand the existing catal structure. + * This can be either an XML Catalog or an SGML Catalog + * + * Returns 0 in case of success, -1 in case of error + */ +static int +xmlExpandCatalog(xmlCatalogPtr catal, const char *filename) +{ + int ret; + + if ((catal == NULL) || (filename == NULL)) + return(-1); + + + if (catal->type == XML_SGML_CATALOG_TYPE) { + xmlChar *content; + + content = xmlLoadFileContent(filename); + if (content == NULL) + return(-1); + + ret = xmlParseSGMLCatalog(catal, content, filename, 0); + if (ret < 0) { + xmlFree(content); + return(-1); + } + xmlFree(content); + } else { + xmlCatalogEntryPtr tmp, cur; + tmp = xmlNewCatalogEntry(XML_CATA_CATALOG, NULL, + NULL, BAD_CAST filename, xmlCatalogDefaultPrefer); + + cur = catal->xml; + if (cur == NULL) { + catal->xml = tmp; + } else { + while (cur->next != NULL) cur = cur->next; + cur->next = tmp; + } + } + return (0); +} + +/** + * xmlACatalogResolveSystem: + * @catal: a Catalog + * @sysID: the public ID string + * + * Try to lookup the catalog resource for a system ID + * + * Returns the system ID if found or NULL otherwise, the value returned + * must be freed by the caller. + */ +xmlChar * +xmlACatalogResolveSystem(xmlCatalogPtr catal, const xmlChar *sysID) { + xmlChar *ret = NULL; + + if ((sysID == NULL) || (catal == NULL)) + return(NULL); + + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Resolve sysID %s\n", sysID); + + if (catal->type == XML_XML_CATALOG_TYPE) { + ret = xmlCatalogListXMLResolve(catal->xml, NULL, sysID); + if (ret == XML_CATAL_BREAK) + ret = NULL; + } else { + const xmlChar *sgml; + + sgml = xmlCatalogGetSGMLSystem(catal->sgml, sysID); + if (sgml != NULL) + ret = xmlStrdup(sgml); + } + return(ret); +} + +/** + * xmlACatalogResolvePublic: + * @catal: a Catalog + * @pubID: the public ID string + * + * Try to lookup the system ID associated to a public ID in that catalog + * + * Returns the system ID if found or NULL otherwise, the value returned + * must be freed by the caller. + */ +xmlChar * +xmlACatalogResolvePublic(xmlCatalogPtr catal, const xmlChar *pubID) { + xmlChar *ret = NULL; + + if ((pubID == NULL) || (catal == NULL)) + return(NULL); + + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Resolve pubID %s\n", pubID); + + if (catal->type == XML_XML_CATALOG_TYPE) { + ret = xmlCatalogListXMLResolve(catal->xml, pubID, NULL); + if (ret == XML_CATAL_BREAK) + ret = NULL; + } else { + const xmlChar *sgml; + + sgml = xmlCatalogGetSGMLPublic(catal->sgml, pubID); + if (sgml != NULL) + ret = xmlStrdup(sgml); + } + return(ret); +} + +/** + * xmlACatalogResolve: + * @catal: a Catalog + * @pubID: the public ID string + * @sysID: the system ID string + * + * Do a complete resolution lookup of an External Identifier + * + * Returns the URI of the resource or NULL if not found, it must be freed + * by the caller. + */ +xmlChar * +xmlACatalogResolve(xmlCatalogPtr catal, const xmlChar * pubID, + const xmlChar * sysID) +{ + xmlChar *ret = NULL; + + if (((pubID == NULL) && (sysID == NULL)) || (catal == NULL)) + return (NULL); + + if (xmlDebugCatalogs) { + if (pubID != NULL) { + xmlGenericError(xmlGenericErrorContext, + "Resolve: pubID %s\n", pubID); + } else { + xmlGenericError(xmlGenericErrorContext, + "Resolve: sysID %s\n", sysID); + } + } + + if (catal->type == XML_XML_CATALOG_TYPE) { + ret = xmlCatalogListXMLResolve(catal->xml, pubID, sysID); + if (ret == XML_CATAL_BREAK) + ret = NULL; + } else { + const xmlChar *sgml; + + sgml = xmlCatalogSGMLResolve(catal, pubID, sysID); + if (sgml != NULL) + ret = xmlStrdup(sgml); + } + return (ret); +} + +/** + * xmlACatalogResolveURI: + * @catal: a Catalog + * @URI: the URI + * + * Do a complete resolution lookup of an URI + * + * Returns the URI of the resource or NULL if not found, it must be freed + * by the caller. + */ +xmlChar * +xmlACatalogResolveURI(xmlCatalogPtr catal, const xmlChar *URI) { + xmlChar *ret = NULL; + + if ((URI == NULL) || (catal == NULL)) + return(NULL); + + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Resolve URI %s\n", URI); + + if (catal->type == XML_XML_CATALOG_TYPE) { + ret = xmlCatalogListXMLResolveURI(catal->xml, URI); + if (ret == XML_CATAL_BREAK) + ret = NULL; + } else { + const xmlChar *sgml; + + sgml = xmlCatalogSGMLResolve(catal, NULL, URI); + if (sgml != NULL) + sgml = xmlStrdup(sgml); + } + return(ret); +} + +/** + * xmlACatalogDump: + * @catal: a Catalog + * @out: the file. + * + * Free up all the memory associated with catalogs + */ +void +xmlACatalogDump(xmlCatalogPtr catal, FILE *out) { + if ((out == NULL) || (catal == NULL)) + return; + + if (catal->type == XML_XML_CATALOG_TYPE) { + xmlDumpXMLCatalog(out, catal->xml); + } else { + xmlHashScan(catal->sgml, + (xmlHashScanner) xmlCatalogDumpEntry, out); + } +} + +/** + * xmlACatalogAdd: + * @catal: a Catalog + * @type: the type of record to add to the catalog + * @orig: the system, public or prefix to match + * @replace: the replacement value for the match + * + * Add an entry in the catalog, it may overwrite existing but + * different entries. + * + * Returns 0 if successful, -1 otherwise + */ +int +xmlACatalogAdd(xmlCatalogPtr catal, const xmlChar * type, + const xmlChar * orig, const xmlChar * replace) +{ + int res = -1; + + if (catal == NULL) + return(-1); + + if (catal->type == XML_XML_CATALOG_TYPE) { + res = xmlAddXMLCatalog(catal->xml, type, orig, replace); + } else { + xmlCatalogEntryType cattype; + + cattype = xmlGetSGMLCatalogEntryType(type); + if (cattype != XML_CATA_NONE) { + xmlCatalogEntryPtr entry; + + entry = xmlNewCatalogEntry(cattype, orig, replace, NULL, + XML_CATA_PREFER_NONE); + if (catal->sgml == NULL) + catal->sgml = xmlHashCreate(10); + res = xmlHashAddEntry(catal->sgml, orig, entry); + } + } + return (res); +} + +/** + * xmlACatalogRemove: + * @catal: a Catalog + * @value: the value to remove + * + * Remove an entry from the catalog + * + * Returns the number of entries removed if successful, -1 otherwise + */ +int +xmlACatalogRemove(xmlCatalogPtr catal, const xmlChar *value) { + int res = -1; + + if ((catal == NULL) || (value == NULL)) + return(-1); + + if (catal->type == XML_XML_CATALOG_TYPE) { + res = xmlDelXMLCatalog(catal->xml, value); + } else { + res = xmlHashRemoveEntry(catal->sgml, value, + (xmlHashDeallocator) xmlFreeCatalogEntry); + if (res == 0) + res = 1; + } + return(res); +} + +/** + * xmlNewCatalog: + * @sgml: should this create an SGML catalog + * + * create a new Catalog. + * + * Returns the xmlCatalogPtr or NULL in case of error + */ +xmlCatalogPtr +xmlNewCatalog(int sgml) { + xmlCatalogPtr catal = NULL; + + if (sgml) { + catal = xmlCreateNewCatalog(XML_SGML_CATALOG_TYPE, + xmlCatalogDefaultPrefer); + if ((catal != NULL) && (catal->sgml == NULL)) + catal->sgml = xmlHashCreate(10); + } else + catal = xmlCreateNewCatalog(XML_XML_CATALOG_TYPE, + xmlCatalogDefaultPrefer); + return(catal); +} + +/** + * xmlCatalogIsEmpty: + * @catal: should this create an SGML catalog + * + * Check is a catalog is empty + * + * Returns 1 if the catalog is empty, 0 if not, amd -1 in case of error. + */ +int +xmlCatalogIsEmpty(xmlCatalogPtr catal) { + if (catal == NULL) + return(-1); + + if (catal->type == XML_XML_CATALOG_TYPE) { + if (catal->xml == NULL) + return(1); + if ((catal->xml->type != XML_CATA_CATALOG) && + (catal->xml->type != XML_CATA_BROKEN_CATALOG)) + return(-1); + if (catal->xml->children == NULL) + return(1); + return(0); + } else { + int res; + + if (catal->sgml == NULL) + return(1); + res = xmlHashSize(catal->sgml); + if (res == 0) + return(1); + if (res < 0) + return(-1); + } + return(0); +} + +/************************************************************************ + * * + * Public interfaces manipulating the global shared default catalog * + * * + ************************************************************************/ + +/** + * xmlInitializeCatalogData: + * + * Do the catalog initialization only of global data, doesn't try to load + * any catalog actually. + * this function is not thread safe, catalog initialization should + * preferably be done once at startup + */ +static void +xmlInitializeCatalogData(void) { + if (xmlCatalogInitialized != 0) + return; + + if (getenv("XML_DEBUG_CATALOG")) + xmlDebugCatalogs = 1; + xmlCatalogMutex = xmlNewRMutex(); + + xmlCatalogInitialized = 1; +} +/** + * xmlInitializeCatalog: + * + * Do the catalog initialization. + * this function is not thread safe, catalog initialization should + * preferably be done once at startup + */ +void +xmlInitializeCatalog(void) { + if (xmlCatalogInitialized != 0) + return; + + xmlInitializeCatalogData(); + xmlRMutexLock(xmlCatalogMutex); + + if (getenv("XML_DEBUG_CATALOG")) + xmlDebugCatalogs = 1; + + if (xmlDefaultCatalog == NULL) { + const char *catalogs; + char *path; + const char *cur, *paths; + xmlCatalogPtr catal; + xmlCatalogEntryPtr *nextent; + + catalogs = (const char *) getenv("XML_CATALOG_FILES"); + if (catalogs == NULL) + catalogs = XML_XML_DEFAULT_CATALOG; + + catal = xmlCreateNewCatalog(XML_XML_CATALOG_TYPE, + xmlCatalogDefaultPrefer); + if (catal != NULL) { + /* the XML_CATALOG_FILES envvar is allowed to contain a + space-separated list of entries. */ + cur = catalogs; + nextent = &catal->xml; + while (*cur != '\0') { + while (IS_BLANK(*cur)) + cur++; + if (*cur != 0) { + paths = cur; + while ((*cur != 0) && (!IS_BLANK(*cur))) + cur++; + path = (char *) xmlStrndup((const xmlChar *)paths, cur - paths); + if (path != NULL) { + *nextent = xmlNewCatalogEntry(XML_CATA_CATALOG, NULL, + NULL, BAD_CAST path, xmlCatalogDefaultPrefer); + if (*nextent != NULL) + nextent = &((*nextent)->next); + xmlFree(path); + } + } + } + xmlDefaultCatalog = catal; + } + } + + xmlRMutexUnlock(xmlCatalogMutex); +} + + +/** + * xmlLoadCatalog: + * @filename: a file path + * + * Load the catalog and makes its definitions effective for the default + * external entity loader. It will recurse in SGML CATALOG entries. + * this function is not thread safe, catalog initialization should + * preferably be done once at startup + * + * Returns 0 in case of success -1 in case of error + */ +int +xmlLoadCatalog(const char *filename) +{ + int ret; + xmlCatalogPtr catal; + + if (!xmlCatalogInitialized) + xmlInitializeCatalogData(); + + xmlRMutexLock(xmlCatalogMutex); + + if (xmlDefaultCatalog == NULL) { + catal = xmlLoadACatalog(filename); + if (catal == NULL) + return(-1); + + xmlDefaultCatalog = catal; + xmlRMutexUnlock(xmlCatalogMutex); + return(0); + } + + ret = xmlExpandCatalog(xmlDefaultCatalog, filename); + xmlRMutexUnlock(xmlCatalogMutex); + return(ret); +} + +/** + * xmlLoadCatalogs: + * @pathss: a list of directories separated by a colon or a space. + * + * Load the catalogs and makes their definitions effective for the default + * external entity loader. + * this function is not thread safe, catalog initialization should + * preferably be done once at startup + */ +void +xmlLoadCatalogs(const char *pathss) { + const char *cur; + const char *paths; + xmlChar *path; + + if (pathss == NULL) + return; + + cur = pathss; + while ((cur != NULL) && (*cur != 0)) { + while (IS_BLANK(*cur)) cur++; + if (*cur != 0) { + paths = cur; + while ((*cur != 0) && (*cur != ':') && (!IS_BLANK(*cur))) + cur++; + path = xmlStrndup((const xmlChar *)paths, cur - paths); + if (path != NULL) { + xmlLoadCatalog((const char *) path); + xmlFree(path); + } + } + while (*cur == ':') + cur++; + } +} + +/** + * xmlCatalogCleanup: + * + * Free up all the memory associated with catalogs + */ +void +xmlCatalogCleanup(void) { + if (xmlCatalogInitialized == 0) + return; + + xmlRMutexLock(xmlCatalogMutex); + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Catalogs cleanup\n"); + if (xmlCatalogXMLFiles != NULL) + xmlHashFree(xmlCatalogXMLFiles, + (xmlHashDeallocator)xmlFreeCatalogHashEntryList); + xmlCatalogXMLFiles = NULL; + if (xmlDefaultCatalog != NULL) + xmlFreeCatalog(xmlDefaultCatalog); + xmlDefaultCatalog = NULL; + xmlDebugCatalogs = 0; + xmlCatalogInitialized = 0; + xmlRMutexUnlock(xmlCatalogMutex); + xmlFreeRMutex(xmlCatalogMutex); +} + +/** + * xmlCatalogResolveSystem: + * @sysID: the public ID string + * + * Try to lookup the catalog resource for a system ID + * + * Returns the system ID if found or NULL otherwise, the value returned + * must be freed by the caller. + */ +xmlChar * +xmlCatalogResolveSystem(const xmlChar *sysID) { + xmlChar *ret; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + ret = xmlACatalogResolveSystem(xmlDefaultCatalog, sysID); + return(ret); +} + +/** + * xmlCatalogResolvePublic: + * @pubID: the public ID string + * + * Try to lookup the system ID associated to a public ID + * + * Returns the system ID if found or NULL otherwise, the value returned + * must be freed by the caller. + */ +xmlChar * +xmlCatalogResolvePublic(const xmlChar *pubID) { + xmlChar *ret; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + ret = xmlACatalogResolvePublic(xmlDefaultCatalog, pubID); + return(ret); +} + +/** + * xmlCatalogResolve: + * @pubID: the public ID string + * @sysID: the system ID string + * + * Do a complete resolution lookup of an External Identifier + * + * Returns the URI of the resource or NULL if not found, it must be freed + * by the caller. + */ +xmlChar * +xmlCatalogResolve(const xmlChar *pubID, const xmlChar *sysID) { + xmlChar *ret; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + ret = xmlACatalogResolve(xmlDefaultCatalog, pubID, sysID); + return(ret); +} + +/** + * xmlCatalogResolveURI: + * @URI: the URI + * + * Do a complete resolution lookup of an URI + * + * Returns the URI of the resource or NULL if not found, it must be freed + * by the caller. + */ +xmlChar * +xmlCatalogResolveURI(const xmlChar *URI) { + xmlChar *ret; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + ret = xmlACatalogResolveURI(xmlDefaultCatalog, URI); + return(ret); +} + +/** + * xmlCatalogDump: + * @out: the file. + * + * Free up all the memory associated with catalogs + */ +void +xmlCatalogDump(FILE *out) { + if (out == NULL) + return; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + xmlACatalogDump(xmlDefaultCatalog, out); +} + +/** + * xmlCatalogAdd: + * @type: the type of record to add to the catalog + * @orig: the system, public or prefix to match + * @replace: the replacement value for the match + * + * Add an entry in the catalog, it may overwrite existing but + * different entries. + * If called before any other catalog routine, allows to override the + * default shared catalog put in place by xmlInitializeCatalog(); + * + * Returns 0 if successful, -1 otherwise + */ +int +xmlCatalogAdd(const xmlChar *type, const xmlChar *orig, const xmlChar *replace) { + int res = -1; + + if (!xmlCatalogInitialized) + xmlInitializeCatalogData(); + + xmlRMutexLock(xmlCatalogMutex); + /* + * Specific case where one want to override the default catalog + * put in place by xmlInitializeCatalog(); + */ + if ((xmlDefaultCatalog == NULL) && + (xmlStrEqual(type, BAD_CAST "catalog"))) { + xmlDefaultCatalog = xmlCreateNewCatalog(XML_XML_CATALOG_TYPE, + xmlCatalogDefaultPrefer); + xmlDefaultCatalog->xml = xmlNewCatalogEntry(XML_CATA_CATALOG, NULL, + orig, NULL, xmlCatalogDefaultPrefer); + + xmlRMutexUnlock(xmlCatalogMutex); + return(0); + } + + res = xmlACatalogAdd(xmlDefaultCatalog, type, orig, replace); + xmlRMutexUnlock(xmlCatalogMutex); + return(res); +} + +/** + * xmlCatalogRemove: + * @value: the value to remove + * + * Remove an entry from the catalog + * + * Returns the number of entries removed if successful, -1 otherwise + */ +int +xmlCatalogRemove(const xmlChar *value) { + int res; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + xmlRMutexLock(xmlCatalogMutex); + res = xmlACatalogRemove(xmlDefaultCatalog, value); + xmlRMutexUnlock(xmlCatalogMutex); + return(res); +} + +/** + * xmlCatalogConvert: + * + * Convert all the SGML catalog entries as XML ones + * + * Returns the number of entries converted if successful, -1 otherwise + */ +int +xmlCatalogConvert(void) { + int res = -1; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + xmlRMutexLock(xmlCatalogMutex); + res = xmlConvertSGMLCatalog(xmlDefaultCatalog); + xmlRMutexUnlock(xmlCatalogMutex); + return(res); +} + +/************************************************************************ + * * + * Public interface manipulating the common preferences * + * * + ************************************************************************/ + +/** + * xmlCatalogGetDefaults: + * + * Used to get the user preference w.r.t. to what catalogs should + * be accepted + * + * Returns the current xmlCatalogAllow value + */ +xmlCatalogAllow +xmlCatalogGetDefaults(void) { + return(xmlCatalogDefaultAllow); +} + +/** + * xmlCatalogSetDefaults: + * @allow: what catalogs should be accepted + * + * Used to set the user preference w.r.t. to what catalogs should + * be accepted + */ +void +xmlCatalogSetDefaults(xmlCatalogAllow allow) { + if (xmlDebugCatalogs) { + switch (allow) { + case XML_CATA_ALLOW_NONE: + xmlGenericError(xmlGenericErrorContext, + "Disabling catalog usage\n"); + break; + case XML_CATA_ALLOW_GLOBAL: + xmlGenericError(xmlGenericErrorContext, + "Allowing only global catalogs\n"); + break; + case XML_CATA_ALLOW_DOCUMENT: + xmlGenericError(xmlGenericErrorContext, + "Allowing only catalogs from the document\n"); + break; + case XML_CATA_ALLOW_ALL: + xmlGenericError(xmlGenericErrorContext, + "Allowing all catalogs\n"); + break; + } + } + xmlCatalogDefaultAllow = allow; +} + +/** + * xmlCatalogSetDefaultPrefer: + * @prefer: the default preference for delegation + * + * Allows to set the preference between public and system for deletion + * in XML Catalog resolution. C.f. section 4.1.1 of the spec + * Values accepted are XML_CATA_PREFER_PUBLIC or XML_CATA_PREFER_SYSTEM + * + * Returns the previous value of the default preference for delegation + */ +xmlCatalogPrefer +xmlCatalogSetDefaultPrefer(xmlCatalogPrefer prefer) { + xmlCatalogPrefer ret = xmlCatalogDefaultPrefer; + + if (prefer == XML_CATA_PREFER_NONE) + return(ret); + + if (xmlDebugCatalogs) { + switch (prefer) { + case XML_CATA_PREFER_PUBLIC: + xmlGenericError(xmlGenericErrorContext, + "Setting catalog preference to PUBLIC\n"); + break; + case XML_CATA_PREFER_SYSTEM: + xmlGenericError(xmlGenericErrorContext, + "Setting catalog preference to SYSTEM\n"); + break; + case XML_CATA_PREFER_NONE: + break; + } + } + xmlCatalogDefaultPrefer = prefer; + return(ret); +} + +/** + * xmlCatalogSetDebug: + * @level: the debug level of catalogs required + * + * Used to set the debug level for catalog operation, 0 disable + * debugging, 1 enable it + * + * Returns the previous value of the catalog debugging level + */ +int +xmlCatalogSetDebug(int level) { + int ret = xmlDebugCatalogs; + + if (level <= 0) + xmlDebugCatalogs = 0; + else + xmlDebugCatalogs = level; + return(ret); +} + +/************************************************************************ + * * + * Minimal interfaces used for per-document catalogs by the parser * + * * + ************************************************************************/ + +/** + * xmlCatalogFreeLocal: + * @catalogs: a document's list of catalogs + * + * Free up the memory associated to the catalog list + */ +void +xmlCatalogFreeLocal(void *catalogs) { + xmlCatalogEntryPtr catal; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + catal = (xmlCatalogEntryPtr) catalogs; + if (catal != NULL) + xmlFreeCatalogEntryList(catal); +} + + +/** + * xmlCatalogAddLocal: + * @catalogs: a document's list of catalogs + * @URL: the URL to a new local catalog + * + * Add the new entry to the catalog list + * + * Returns the updated list + */ +void * +xmlCatalogAddLocal(void *catalogs, const xmlChar *URL) { + xmlCatalogEntryPtr catal, add; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + if (URL == NULL) + return(catalogs); + + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Adding document catalog %s\n", URL); + + add = xmlNewCatalogEntry(XML_CATA_CATALOG, NULL, URL, NULL, + xmlCatalogDefaultPrefer); + if (add == NULL) + return(catalogs); + + catal = (xmlCatalogEntryPtr) catalogs; + if (catal == NULL) + return((void *) add); + + while (catal->next != NULL) + catal = catal->next; + catal->next = add; + return(catalogs); +} + +/** + * xmlCatalogLocalResolve: + * @catalogs: a document's list of catalogs + * @pubID: the public ID string + * @sysID: the system ID string + * + * Do a complete resolution lookup of an External Identifier using a + * document's private catalog list + * + * Returns the URI of the resource or NULL if not found, it must be freed + * by the caller. + */ +xmlChar * +xmlCatalogLocalResolve(void *catalogs, const xmlChar *pubID, + const xmlChar *sysID) { + xmlCatalogEntryPtr catal; + xmlChar *ret; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + if ((pubID == NULL) && (sysID == NULL)) + return(NULL); + + if (xmlDebugCatalogs) { + if (pubID != NULL) { + xmlGenericError(xmlGenericErrorContext, + "Local resolve: pubID %s\n", pubID); + } else { + xmlGenericError(xmlGenericErrorContext, + "Local resolve: sysID %s\n", sysID); + } + } + + catal = (xmlCatalogEntryPtr) catalogs; + if (catal == NULL) + return(NULL); + ret = xmlCatalogListXMLResolve(catal, pubID, sysID); + if ((ret != NULL) && (ret != XML_CATAL_BREAK)) + return(ret); + return(NULL); +} + +/** + * xmlCatalogLocalResolveURI: + * @catalogs: a document's list of catalogs + * @URI: the URI + * + * Do a complete resolution lookup of an URI using a + * document's private catalog list + * + * Returns the URI of the resource or NULL if not found, it must be freed + * by the caller. + */ +xmlChar * +xmlCatalogLocalResolveURI(void *catalogs, const xmlChar *URI) { + xmlCatalogEntryPtr catal; + xmlChar *ret; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + if (URI == NULL) + return(NULL); + + if (xmlDebugCatalogs) + xmlGenericError(xmlGenericErrorContext, + "Resolve URI %s\n", URI); + + catal = (xmlCatalogEntryPtr) catalogs; + if (catal == NULL) + return(NULL); + ret = xmlCatalogListXMLResolveURI(catal, URI); + if ((ret != NULL) && (ret != XML_CATAL_BREAK)) + return(ret); + return(NULL); +} + +/************************************************************************ + * * + * Deprecated interfaces * + * * + ************************************************************************/ +/** + * xmlCatalogGetSystem: + * @sysID: the system ID string + * + * Try to lookup the system ID associated to a public ID + * DEPRECATED, use xmlCatalogResolveSystem() + * + * Returns the system ID if found or NULL otherwise. + */ +const xmlChar * +xmlCatalogGetSystem(const xmlChar *sysID) { + xmlChar *ret; + static xmlChar result[1000]; + static int msg = 0; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + if (msg == 0) { + xmlGenericError(xmlGenericErrorContext, + "Use of deprecated xmlCatalogGetSystem() call\n"); + msg++; + } + + if (sysID == NULL) + return(NULL); + + /* + * Check first the XML catalogs + */ + if (xmlDefaultCatalog != NULL) { + ret = xmlCatalogListXMLResolve(xmlDefaultCatalog->xml, NULL, sysID); + if ((ret != NULL) && (ret != XML_CATAL_BREAK)) { + snprintf((char *) result, sizeof(result) - 1, "%s", (char *) ret); + result[sizeof(result) - 1] = 0; + return(result); + } + } + + if (xmlDefaultCatalog != NULL) + return(xmlCatalogGetSGMLSystem(xmlDefaultCatalog->sgml, sysID)); + return(NULL); +} + +/** + * xmlCatalogGetPublic: + * @pubID: the public ID string + * + * Try to lookup the system ID associated to a public ID + * DEPRECATED, use xmlCatalogResolvePublic() + * + * Returns the system ID if found or NULL otherwise. + */ +const xmlChar * +xmlCatalogGetPublic(const xmlChar *pubID) { + xmlChar *ret; + static xmlChar result[1000]; + static int msg = 0; + + if (!xmlCatalogInitialized) + xmlInitializeCatalog(); + + if (msg == 0) { + xmlGenericError(xmlGenericErrorContext, + "Use of deprecated xmlCatalogGetPublic() call\n"); + msg++; + } + + if (pubID == NULL) + return(NULL); + + /* + * Check first the XML catalogs + */ + if (xmlDefaultCatalog != NULL) { + ret = xmlCatalogListXMLResolve(xmlDefaultCatalog->xml, pubID, NULL); + if ((ret != NULL) && (ret != XML_CATAL_BREAK)) { + snprintf((char *) result, sizeof(result) - 1, "%s", (char *) ret); + result[sizeof(result) - 1] = 0; + return(result); + } + } + + if (xmlDefaultCatalog != NULL) + return(xmlCatalogGetSGMLPublic(xmlDefaultCatalog->sgml, pubID)); + return(NULL); +} + +#endif /* LIBXML_CATALOG_ENABLED */ diff --git a/bundle/libxml/config.m4 b/bundle/libxml/config.m4 new file mode 100644 index 0000000000..4faadda0ac --- /dev/null +++ b/bundle/libxml/config.m4 @@ -0,0 +1,80 @@ +dnl +dnl $Id$ +dnl + +PHP_ARG_BUNDLE(bundle-libxml, Whether to bundle the libxml2 library, +[ --without-bundle-libxml Disable the bundled libxml2 library], yes) + +AC_DEFUN(PHP_LIBXML_CHECK_VERSION,[ + old_CPPFLAGS=$CPPFLAGS + CPPFLAGS=-I$LIBXML_DIR/include$LIBXML_ADD + AC_MSG_CHECKING(for libxml version) + AC_EGREP_CPP(yes,[ +#include <libxml/xmlversion.h> +#if LIBXML_VERSION >= 20414 + yes +#endif + ],[ + AC_MSG_RESULT(>= 2.4.14) + ],[ + AC_MSG_ERROR(libxml version 2.4.14 or greater required.) + ]) + CPPFLAGS=$old_CPPFLAGS +]) + +if test "$PHP_BUNDLE_LIBXML" = "yes"; then + AC_DEFINE(HAVE_LIBXML, 1, [ ]) + AC_DEFINE(HAVE_LIBXML_BUNDLED, 1, [ ]) + PHP_ADD_SOURCES(bundle/libxml, DOCBparser.c encoding.c nanohttp.c valid.c xmlreader.c \ + HTMLparser.c entities.c parser.c threads.c xinclude.c xmlregexp.c \ + HTMLtree.c error.c parserInternals.c tree.c xlink.c xmlschemas.c \ + SAX.c globals.c trio.c xmlIO.c xmlschemastypes.c \ + c14n.c hash.c trionan.c xmlunicode.c debugXML.c \ + catalog.c list.c triostr.c xpath.c \ + nanoftp.c uri.c xmlmemory.c xpointer.c) + PHP_ADD_INCLUDE(bundle/libxml/include/) + PHP_ADD_INCLUDE(bundle/libxml/) +elif test "$PHP_BUNDLE_LIBXML" != "no"; then + for i in $PHP_BUNDLE_LIBXML; do + if test -r $i/include/libxml/tree.h; then + LIBXML_DIR=$i + elif test -r $i/include/libxml2/libxml/tree.h; then + LIBXML_DIR=$i + LIBXML_ADD="/libxml2" + fi + done + + if test -z "$LIBXML_DIR"; then + AC_MSG_RESULT(not found) + AC_MSG_ERROR(Please reinstall the libxml >= 2.4.14 distribution) + fi + + PHP_LIBXML_CHECK_VERSION + + if test -r $LIBXML_DIR/lib/libxml2.a -o -f $LIBXML_DIR/lib/libxml2.$SHLIB_SUFFIX_NAMEE; then + LIBXML_LIBNAME=xml2 + else + LIBXML_LIBNAME=xml + fi + + XML2_CONFIG=$LIBXML_DIR/bin/xml2-config + + if test -x $XML2_CONFIG; then + LIBXML_LIBS=`$XML2_CONFIG --libs` + PHP_EVAL_LIBLINE($LIBXML_LIBS, LIBXML_SHARED_LIBADD) + else + PHP_ADD_LIBRARY_WITH_PATH($LIBXML_LIBNAME, $LIBXML_DIR/lib, LIBXML_SHARED_LIBADD) + fi + + PHP_ADD_INCLUDE($LIBXML_DIR/include$LIBXML_ADD) + + if test "$PHP_ZLIB_DIR" = "no"; then + AC_MSG_ERROR(libxml requires zlib. Use --with-zlib-dir=<DIR>) + else + PHP_ADD_LIBRARY_WITH_PATH(z, $PHP_ZLIB_DIR/lib, LIBXML_SHARED_LIBADD) + fi + + AC_DEFINE(HAVE_LIBXML, 1, [ ]) + PHP_SUBST(LIBXML_SHARED_LIBADD) +fi + diff --git a/bundle/libxml/debugXML.c b/bundle/libxml/debugXML.c new file mode 100644 index 0000000000..07a016861a --- /dev/null +++ b/bundle/libxml/debugXML.c @@ -0,0 +1,2480 @@ +/* + * debugXML.c : This is a set of routines used for debugging the tree + * produced by the XML parser. + * + * See Copyright for the status of this software. + * + * Daniel Veillard <daniel@veillard.com> + */ + +#define IN_LIBXML +#include "libxml.h" +#ifdef LIBXML_DEBUG_ENABLED + +#include <string.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_STRING_H +#include <string.h> +#endif +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/valid.h> +#include <libxml/debugXML.h> +#include <libxml/HTMLtree.h> +#include <libxml/HTMLparser.h> +#include <libxml/xmlerror.h> +#include <libxml/globals.h> +#include <libxml/xpathInternals.h> + +/** + * xmlDebugDumpString: + * @output: the FILE * for the output + * @str: the string + * + * Dumps informations about the string, shorten it if necessary + */ +void +xmlDebugDumpString(FILE * output, const xmlChar * str) +{ + int i; + + if (output == NULL) + output = stdout; + if (str == NULL) { + fprintf(output, "(NULL)"); + return; + } + for (i = 0; i < 40; i++) + if (str[i] == 0) + return; + else if (IS_BLANK(str[i])) + fputc(' ', output); + else if (str[i] >= 0x80) + fprintf(output, "#%X", str[i]); + else + fputc(str[i], output); + fprintf(output, "..."); +} + +static void +xmlDebugDumpDtdNode(FILE *output, xmlDtdPtr dtd, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (dtd == NULL) { + fprintf(output, "DTD node is NULL\n"); + return; + } + + if (dtd->type != XML_DTD_NODE) { + fprintf(output, "PBM: not a DTD\n"); + return; + } + if (dtd->name != NULL) + fprintf(output, "DTD(%s)", dtd->name); + else + fprintf(output, "DTD"); + if (dtd->ExternalID != NULL) + fprintf(output, ", PUBLIC %s", dtd->ExternalID); + if (dtd->SystemID != NULL) + fprintf(output, ", SYSTEM %s", dtd->SystemID); + fprintf(output, "\n"); + /* + * Do a bit of checking + */ + if (dtd->parent == NULL) + fprintf(output, "PBM: DTD has no parent\n"); + if (dtd->doc == NULL) + fprintf(output, "PBM: DTD has no doc\n"); + if ((dtd->parent != NULL) && (dtd->doc != dtd->parent->doc)) + fprintf(output, "PBM: DTD doc differs from parent's one\n"); + if (dtd->prev == NULL) { + if ((dtd->parent != NULL) && (dtd->parent->children != (xmlNodePtr)dtd)) + fprintf(output, "PBM: DTD has no prev and not first of list\n"); + } else { + if (dtd->prev->next != (xmlNodePtr) dtd) + fprintf(output, "PBM: DTD prev->next : back link wrong\n"); + } + if (dtd->next == NULL) { + if ((dtd->parent != NULL) && (dtd->parent->last != (xmlNodePtr) dtd)) + fprintf(output, "PBM: DTD has no next and not last of list\n"); + } else { + if (dtd->next->prev != (xmlNodePtr) dtd) + fprintf(output, "PBM: DTD next->prev : forward link wrong\n"); + } +} + +static void +xmlDebugDumpAttrDecl(FILE *output, xmlAttributePtr attr, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (attr == NULL) { + fprintf(output, "Attribute declaration is NULL\n"); + return; + } + if (attr->type != XML_ATTRIBUTE_DECL) { + fprintf(output, "PBM: not a Attr\n"); + return; + } + if (attr->name != NULL) + fprintf(output, "ATTRDECL(%s)", attr->name); + else + fprintf(output, "PBM ATTRDECL noname!!!"); + if (attr->elem != NULL) + fprintf(output, " for %s", attr->elem); + else + fprintf(output, " PBM noelem!!!"); + switch (attr->atype) { + case XML_ATTRIBUTE_CDATA: + fprintf(output, " CDATA"); + break; + case XML_ATTRIBUTE_ID: + fprintf(output, " ID"); + break; + case XML_ATTRIBUTE_IDREF: + fprintf(output, " IDREF"); + break; + case XML_ATTRIBUTE_IDREFS: + fprintf(output, " IDREFS"); + break; + case XML_ATTRIBUTE_ENTITY: + fprintf(output, " ENTITY"); + break; + case XML_ATTRIBUTE_ENTITIES: + fprintf(output, " ENTITIES"); + break; + case XML_ATTRIBUTE_NMTOKEN: + fprintf(output, " NMTOKEN"); + break; + case XML_ATTRIBUTE_NMTOKENS: + fprintf(output, " NMTOKENS"); + break; + case XML_ATTRIBUTE_ENUMERATION: + fprintf(output, " ENUMERATION"); + break; + case XML_ATTRIBUTE_NOTATION: + fprintf(output, " NOTATION "); + break; + } + if (attr->tree != NULL) { + int indx; + xmlEnumerationPtr cur = attr->tree; + + for (indx = 0;indx < 5; indx++) { + if (indx != 0) + fprintf(output, "|%s", cur->name); + else + fprintf(output, " (%s", cur->name); + cur = cur->next; + if (cur == NULL) break; + } + if (cur == NULL) + fprintf(output, ")"); + else + fprintf(output, "...)"); + } + switch (attr->def) { + case XML_ATTRIBUTE_NONE: + break; + case XML_ATTRIBUTE_REQUIRED: + fprintf(output, " REQUIRED"); + break; + case XML_ATTRIBUTE_IMPLIED: + fprintf(output, " IMPLIED"); + break; + case XML_ATTRIBUTE_FIXED: + fprintf(output, " FIXED"); + break; + } + if (attr->defaultValue != NULL) { + fprintf(output, "\""); + xmlDebugDumpString(output, attr->defaultValue); + fprintf(output, "\""); + } + fprintf(output, "\n"); + + /* + * Do a bit of checking + */ + if (attr->parent == NULL) + fprintf(output, "PBM: Attr has no parent\n"); + if (attr->doc == NULL) + fprintf(output, "PBM: Attr has no doc\n"); + if ((attr->parent != NULL) && (attr->doc != attr->parent->doc)) + fprintf(output, "PBM: Attr doc differs from parent's one\n"); + if (attr->prev == NULL) { + if ((attr->parent != NULL) && (attr->parent->children != (xmlNodePtr)attr)) + fprintf(output, "PBM: Attr has no prev and not first of list\n"); + } else { + if (attr->prev->next != (xmlNodePtr) attr) + fprintf(output, "PBM: Attr prev->next : back link wrong\n"); + } + if (attr->next == NULL) { + if ((attr->parent != NULL) && (attr->parent->last != (xmlNodePtr) attr)) + fprintf(output, "PBM: Attr has no next and not last of list\n"); + } else { + if (attr->next->prev != (xmlNodePtr) attr) + fprintf(output, "PBM: Attr next->prev : forward link wrong\n"); + } +} + +static void +xmlDebugDumpElemDecl(FILE *output, xmlElementPtr elem, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (elem == NULL) { + fprintf(output, "Element declaration is NULL\n"); + return; + } + if (elem->type != XML_ELEMENT_DECL) { + fprintf(output, "PBM: not a Elem\n"); + return; + } + if (elem->name != NULL) { + fprintf(output, "ELEMDECL("); + xmlDebugDumpString(output, elem->name); + fprintf(output, ")"); + } else + fprintf(output, "PBM ELEMDECL noname!!!"); + switch (elem->etype) { + case XML_ELEMENT_TYPE_UNDEFINED: + fprintf(output, ", UNDEFINED"); + break; + case XML_ELEMENT_TYPE_EMPTY: + fprintf(output, ", EMPTY"); + break; + case XML_ELEMENT_TYPE_ANY: + fprintf(output, ", ANY"); + break; + case XML_ELEMENT_TYPE_MIXED: + fprintf(output, ", MIXED "); + break; + case XML_ELEMENT_TYPE_ELEMENT: + fprintf(output, ", MIXED "); + break; + } + if ((elem->type != XML_ELEMENT_NODE) && + (elem->content != NULL)) { + char buf[5001]; + + buf[0] = 0; + xmlSnprintfElementContent(buf, 5000, elem->content, 1); + buf[5000] = 0; + fprintf(output, "%s", buf); + } + fprintf(output, "\n"); + + /* + * Do a bit of checking + */ + if (elem->parent == NULL) + fprintf(output, "PBM: Elem has no parent\n"); + if (elem->doc == NULL) + fprintf(output, "PBM: Elem has no doc\n"); + if ((elem->parent != NULL) && (elem->doc != elem->parent->doc)) + fprintf(output, "PBM: Elem doc differs from parent's one\n"); + if (elem->prev == NULL) { + if ((elem->parent != NULL) && (elem->parent->children != (xmlNodePtr)elem)) + fprintf(output, "PBM: Elem has no prev and not first of list\n"); + } else { + if (elem->prev->next != (xmlNodePtr) elem) + fprintf(output, "PBM: Elem prev->next : back link wrong\n"); + } + if (elem->next == NULL) { + if ((elem->parent != NULL) && (elem->parent->last != (xmlNodePtr) elem)) + fprintf(output, "PBM: Elem has no next and not last of list\n"); + } else { + if (elem->next->prev != (xmlNodePtr) elem) + fprintf(output, "PBM: Elem next->prev : forward link wrong\n"); + } +} + +static void +xmlDebugDumpEntityDecl(FILE *output, xmlEntityPtr ent, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (ent == NULL) { + fprintf(output, "Entity declaration is NULL\n"); + return; + } + if (ent->type != XML_ENTITY_DECL) { + fprintf(output, "PBM: not a Entity decl\n"); + return; + } + if (ent->name != NULL) { + fprintf(output, "ENTITYDECL("); + xmlDebugDumpString(output, ent->name); + fprintf(output, ")"); + } else + fprintf(output, "PBM ENTITYDECL noname!!!"); + switch (ent->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + fprintf(output, ", internal\n"); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + fprintf(output, ", external parsed\n"); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + fprintf(output, ", unparsed\n"); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + fprintf(output, ", parameter\n"); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + fprintf(output, ", external parameter\n"); + break; + case XML_INTERNAL_PREDEFINED_ENTITY: + fprintf(output, ", predefined\n"); + break; + } + if (ent->ExternalID) { + fprintf(output, shift); + fprintf(output, " ExternalID=%s\n", ent->ExternalID); + } + if (ent->SystemID) { + fprintf(output, shift); + fprintf(output, " SystemID=%s\n", ent->SystemID); + } + if (ent->URI != NULL) { + fprintf(output, shift); + fprintf(output, " URI=%s\n", ent->URI); + } + if (ent->content) { + fprintf(output, shift); + fprintf(output, " content="); + xmlDebugDumpString(output, ent->content); + fprintf(output, "\n"); + } + + /* + * Do a bit of checking + */ + if (ent->parent == NULL) + fprintf(output, "PBM: Ent has no parent\n"); + if (ent->doc == NULL) + fprintf(output, "PBM: Ent has no doc\n"); + if ((ent->parent != NULL) && (ent->doc != ent->parent->doc)) + fprintf(output, "PBM: Ent doc differs from parent's one\n"); + if (ent->prev == NULL) { + if ((ent->parent != NULL) && (ent->parent->children != (xmlNodePtr)ent)) + fprintf(output, "PBM: Ent has no prev and not first of list\n"); + } else { + if (ent->prev->next != (xmlNodePtr) ent) + fprintf(output, "PBM: Ent prev->next : back link wrong\n"); + } + if (ent->next == NULL) { + if ((ent->parent != NULL) && (ent->parent->last != (xmlNodePtr) ent)) + fprintf(output, "PBM: Ent has no next and not last of list\n"); + } else { + if (ent->next->prev != (xmlNodePtr) ent) + fprintf(output, "PBM: Ent next->prev : forward link wrong\n"); + } +} + +static void +xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (ns == NULL) { + fprintf(output, "namespace node is NULL\n"); + return; + } + if (ns->type != XML_NAMESPACE_DECL) { + fprintf(output, "invalid namespace node %d\n", ns->type); + return; + } + if (ns->href == NULL) { + if (ns->prefix != NULL) + fprintf(output, "incomplete namespace %s href=NULL\n", ns->prefix); + else + fprintf(output, "incomplete default namespace href=NULL\n"); + } else { + if (ns->prefix != NULL) + fprintf(output, "namespace %s href=", ns->prefix); + else + fprintf(output, "default namespace href="); + + xmlDebugDumpString(output, ns->href); + fprintf(output, "\n"); + } +} + +static void +xmlDebugDumpNamespaceList(FILE *output, xmlNsPtr ns, int depth) { + while (ns != NULL) { + xmlDebugDumpNamespace(output, ns, depth); + ns = ns->next; + } +} + +static void +xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (ent == NULL) { + fprintf(output, "Entity is NULL\n"); + return; + } + switch (ent->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + fprintf(output, "INTERNAL_GENERAL_ENTITY "); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + fprintf(output, "EXTERNAL_GENERAL_PARSED_ENTITY "); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + fprintf(output, "EXTERNAL_GENERAL_UNPARSED_ENTITY "); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + fprintf(output, "INTERNAL_PARAMETER_ENTITY "); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + fprintf(output, "EXTERNAL_PARAMETER_ENTITY "); + break; + default: + fprintf(output, "ENTITY_%d ! ", ent->etype); + } + fprintf(output, "%s\n", ent->name); + if (ent->ExternalID) { + fprintf(output, shift); + fprintf(output, "ExternalID=%s\n", ent->ExternalID); + } + if (ent->SystemID) { + fprintf(output, shift); + fprintf(output, "SystemID=%s\n", ent->SystemID); + } + if (ent->URI) { + fprintf(output, shift); + fprintf(output, "URI=%s\n", ent->URI); + } + if (ent->content) { + fprintf(output, shift); + fprintf(output, "content="); + xmlDebugDumpString(output, ent->content); + fprintf(output, "\n"); + } +} + +/** + * xmlDebugDumpAttr: + * @output: the FILE * for the output + * @attr: the attribute + * @depth: the indentation level. + * + * Dumps debug information for the attribute + */ +void +xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (attr == NULL) { + fprintf(output, "Attr is NULL"); + return; + } + fprintf(output, "ATTRIBUTE "); + xmlDebugDumpString(output, attr->name); + fprintf(output, "\n"); + if (attr->children != NULL) + xmlDebugDumpNodeList(output, attr->children, depth + 1); + + /* + * Do a bit of checking + */ + if (attr->parent == NULL) + fprintf(output, "PBM: Attr has no parent\n"); + if (attr->doc == NULL) + fprintf(output, "PBM: Attr has no doc\n"); + if ((attr->parent != NULL) && (attr->doc != attr->parent->doc)) + fprintf(output, "PBM: Attr doc differs from parent's one\n"); + if (attr->prev == NULL) { + if ((attr->parent != NULL) && (attr->parent->properties != attr)) + fprintf(output, "PBM: Attr has no prev and not first of list\n"); + } else { + if (attr->prev->next != attr) + fprintf(output, "PBM: Attr prev->next : back link wrong\n"); + } + if (attr->next != NULL) { + if (attr->next->prev != attr) + fprintf(output, "PBM: Attr next->prev : forward link wrong\n"); + } +} + +/** + * xmlDebugDumpAttrList: + * @output: the FILE * for the output + * @attr: the attribute list + * @depth: the indentation level. + * + * Dumps debug information for the attribute list + */ +void +xmlDebugDumpAttrList(FILE * output, xmlAttrPtr attr, int depth) +{ + if (output == NULL) + output = stdout; + while (attr != NULL) { + xmlDebugDumpAttr(output, attr, depth); + attr = attr->next; + } +} + +/** + * xmlDebugDumpOneNode: + * @output: the FILE * for the output + * @node: the node + * @depth: the indentation level. + * + * Dumps debug information for the element node, it is not recursive + */ +void +xmlDebugDumpOneNode(FILE * output, xmlNodePtr node, int depth) +{ + int i; + char shift[100]; + + if (output == NULL) + output = stdout; + for (i = 0; ((i < depth) && (i < 25)); i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + if (node == NULL) { + fprintf(output, shift); + fprintf(output, "node is NULL\n"); + return; + } + switch (node->type) { + case XML_ELEMENT_NODE: + fprintf(output, shift); + fprintf(output, "ELEMENT "); + if ((node->ns != NULL) && (node->ns->prefix != NULL)) { + xmlDebugDumpString(output, node->ns->prefix); + fprintf(output, ":"); + } + xmlDebugDumpString(output, node->name); + fprintf(output, "\n"); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, shift); + fprintf(output, "Error, ATTRIBUTE found here\n"); + break; + case XML_TEXT_NODE: + fprintf(output, shift); + if (node->name == (const xmlChar *) xmlStringTextNoenc) + fprintf(output, "TEXT no enc\n"); + else + fprintf(output, "TEXT\n"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, shift); + fprintf(output, "CDATA_SECTION\n"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, shift); + fprintf(output, "ENTITY_REF(%s)\n", node->name); + break; + case XML_ENTITY_NODE: + fprintf(output, shift); + fprintf(output, "ENTITY\n"); + break; + case XML_PI_NODE: + fprintf(output, shift); + fprintf(output, "PI %s\n", node->name); + break; + case XML_COMMENT_NODE: + fprintf(output, shift); + fprintf(output, "COMMENT\n"); + break; + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + fprintf(output, shift); + fprintf(output, "Error, DOCUMENT found here\n"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, shift); + fprintf(output, "DOCUMENT_TYPE\n"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, shift); + fprintf(output, "DOCUMENT_FRAG\n"); + break; + case XML_NOTATION_NODE: + fprintf(output, shift); + fprintf(output, "NOTATION\n"); + break; + case XML_DTD_NODE: + xmlDebugDumpDtdNode(output, (xmlDtdPtr) node, depth); + return; + case XML_ELEMENT_DECL: + xmlDebugDumpElemDecl(output, (xmlElementPtr) node, depth); + return; + case XML_ATTRIBUTE_DECL: + xmlDebugDumpAttrDecl(output, (xmlAttributePtr) node, depth); + return; + case XML_ENTITY_DECL: + xmlDebugDumpEntityDecl(output, (xmlEntityPtr) node, depth); + return; + case XML_NAMESPACE_DECL: + xmlDebugDumpNamespace(output, (xmlNsPtr) node, depth); + return; + case XML_XINCLUDE_START: + fprintf(output, shift); + fprintf(output, "INCLUDE START\n"); + return; + case XML_XINCLUDE_END: + fprintf(output, shift); + fprintf(output, "INCLUDE END\n"); + return; + default: + fprintf(output, shift); + fprintf(output, "NODE_%d !!!\n", node->type); + return; + } + if (node->doc == NULL) { + fprintf(output, shift); + fprintf(output, "doc == NULL !!!\n"); + } + if (node->nsDef != NULL) + xmlDebugDumpNamespaceList(output, node->nsDef, depth + 1); + if (node->properties != NULL) + xmlDebugDumpAttrList(output, node->properties, depth + 1); + if (node->type != XML_ENTITY_REF_NODE) { + if ((node->type != XML_ELEMENT_NODE) && (node->content != NULL)) { + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i + 2] = shift[2 * i + 3] = 0; + fprintf(output, shift); + fprintf(output, "content="); + xmlDebugDumpString(output, node->content); + fprintf(output, "\n"); + } + } else { + xmlEntityPtr ent; + + ent = xmlGetDocEntity(node->doc, node->name); + if (ent != NULL) + xmlDebugDumpEntity(output, ent, depth + 1); + } + /* + * Do a bit of checking + */ + if (node->parent == NULL) + fprintf(output, "PBM: Node has no parent\n"); + if (node->doc == NULL) + fprintf(output, "PBM: Node has no doc\n"); + if ((node->parent != NULL) && (node->doc != node->parent->doc)) + fprintf(output, "PBM: Node doc differs from parent's one\n"); + if (node->prev == NULL) { + if ((node->parent != NULL) && (node->parent->children != node)) + fprintf(output, + "PBM: Node has no prev and not first of list\n"); + } else { + if (node->prev->next != node) + fprintf(output, "PBM: Node prev->next : back link wrong\n"); + } + if (node->next == NULL) { + if ((node->parent != NULL) && (node->parent->last != node)) + fprintf(output, + "PBM: Node has no next and not last of list\n"); + } else { + if (node->next->prev != node) + fprintf(output, "PBM: Node next->prev : forward link wrong\n"); + } +} + +/** + * xmlDebugDumpNode: + * @output: the FILE * for the output + * @node: the node + * @depth: the indentation level. + * + * Dumps debug information for the element node, it is recursive + */ +void +xmlDebugDumpNode(FILE * output, xmlNodePtr node, int depth) +{ + if (output == NULL) + output = stdout; + if (node == NULL) { + int i; + char shift[100]; + + for (i = 0; ((i < depth) && (i < 25)); i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + fprintf(output, "node is NULL\n"); + return; + } + xmlDebugDumpOneNode(output, node, depth); + if ((node->children != NULL) && (node->type != XML_ENTITY_REF_NODE)) + xmlDebugDumpNodeList(output, node->children, depth + 1); +} + +/** + * xmlDebugDumpNodeList: + * @output: the FILE * for the output + * @node: the node list + * @depth: the indentation level. + * + * Dumps debug information for the list of element node, it is recursive + */ +void +xmlDebugDumpNodeList(FILE * output, xmlNodePtr node, int depth) +{ + if (output == NULL) + output = stdout; + while (node != NULL) { + xmlDebugDumpNode(output, node, depth); + node = node->next; + } +} + + +/** + * xmlDebugDumpDocumentHead: + * @output: the FILE * for the output + * @doc: the document + * + * Dumps debug information cncerning the document, not recursive + */ +void +xmlDebugDumpDocumentHead(FILE * output, xmlDocPtr doc) +{ + if (output == NULL) + output = stdout; + if (doc == NULL) { + fprintf(output, "DOCUMENT == NULL !\n"); + return; + } + + switch (doc->type) { + case XML_ELEMENT_NODE: + fprintf(output, "Error, ELEMENT found here "); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, "Error, ATTRIBUTE found here\n"); + break; + case XML_TEXT_NODE: + fprintf(output, "Error, TEXT\n"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, "Error, CDATA_SECTION\n"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, "Error, ENTITY_REF\n"); + break; + case XML_ENTITY_NODE: + fprintf(output, "Error, ENTITY\n"); + break; + case XML_PI_NODE: + fprintf(output, "Error, PI\n"); + break; + case XML_COMMENT_NODE: + fprintf(output, "Error, COMMENT\n"); + break; + case XML_DOCUMENT_NODE: + fprintf(output, "DOCUMENT\n"); + break; + case XML_HTML_DOCUMENT_NODE: + fprintf(output, "HTML DOCUMENT\n"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, "Error, DOCUMENT_TYPE\n"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, "Error, DOCUMENT_FRAG\n"); + break; + case XML_NOTATION_NODE: + fprintf(output, "Error, NOTATION\n"); + break; + default: + fprintf(output, "NODE_%d\n", doc->type); + } + if (doc->name != NULL) { + fprintf(output, "name="); + xmlDebugDumpString(output, BAD_CAST doc->name); + fprintf(output, "\n"); + } + if (doc->version != NULL) { + fprintf(output, "version="); + xmlDebugDumpString(output, doc->version); + fprintf(output, "\n"); + } + if (doc->encoding != NULL) { + fprintf(output, "encoding="); + xmlDebugDumpString(output, doc->encoding); + fprintf(output, "\n"); + } + if (doc->URL != NULL) { + fprintf(output, "URL="); + xmlDebugDumpString(output, doc->URL); + fprintf(output, "\n"); + } + if (doc->standalone) + fprintf(output, "standalone=true\n"); + if (doc->oldNs != NULL) + xmlDebugDumpNamespaceList(output, doc->oldNs, 0); +} + +/** + * xmlDebugDumpDocument: + * @output: the FILE * for the output + * @doc: the document + * + * Dumps debug information for the document, it's recursive + */ +void +xmlDebugDumpDocument(FILE * output, xmlDocPtr doc) +{ + if (output == NULL) + output = stdout; + if (doc == NULL) { + fprintf(output, "DOCUMENT == NULL !\n"); + return; + } + xmlDebugDumpDocumentHead(output, doc); + if (((doc->type == XML_DOCUMENT_NODE) || + (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL)) + xmlDebugDumpNodeList(output, doc->children, 1); +} + +/** + * xmlDebugDumpDTD: + * @output: the FILE * for the output + * @dtd: the DTD + * + * Dumps debug information for the DTD + */ +void +xmlDebugDumpDTD(FILE * output, xmlDtdPtr dtd) +{ + if (output == NULL) + output = stdout; + if (dtd == NULL) { + fprintf(output, "DTD is NULL\n"); + return; + } + if (dtd->type != XML_DTD_NODE) { + fprintf(output, "PBM: not a DTD\n"); + return; + } + if (dtd->name != NULL) + fprintf(output, "DTD(%s)", dtd->name); + else + fprintf(output, "DTD"); + if (dtd->ExternalID != NULL) + fprintf(output, ", PUBLIC %s", dtd->ExternalID); + if (dtd->SystemID != NULL) + fprintf(output, ", SYSTEM %s", dtd->SystemID); + fprintf(output, "\n"); + /* + * Do a bit of checking + */ + if ((dtd->parent != NULL) && (dtd->doc != dtd->parent->doc)) + fprintf(output, "PBM: DTD doc differs from parent's one\n"); + if (dtd->prev == NULL) { + if ((dtd->parent != NULL) + && (dtd->parent->children != (xmlNodePtr) dtd)) + fprintf(output, + "PBM: DTD has no prev and not first of list\n"); + } else { + if (dtd->prev->next != (xmlNodePtr) dtd) + fprintf(output, "PBM: DTD prev->next : back link wrong\n"); + } + if (dtd->next == NULL) { + if ((dtd->parent != NULL) + && (dtd->parent->last != (xmlNodePtr) dtd)) + fprintf(output, "PBM: DTD has no next and not last of list\n"); + } else { + if (dtd->next->prev != (xmlNodePtr) dtd) + fprintf(output, "PBM: DTD next->prev : forward link wrong\n"); + } + if (dtd->children == NULL) + fprintf(output, " DTD is empty\n"); + else + xmlDebugDumpNodeList(output, dtd->children, 1); +} + +static void +xmlDebugDumpEntityCallback(xmlEntityPtr cur, FILE *output) { + if (cur == NULL) { + fprintf(output, "Entity is NULL"); + return; + } + fprintf(output, "%s : ", cur->name); + switch (cur->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + fprintf(output, "INTERNAL GENERAL, "); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + fprintf(output, "EXTERNAL PARSED, "); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + fprintf(output, "EXTERNAL UNPARSED, "); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + fprintf(output, "INTERNAL PARAMETER, "); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + fprintf(output, "EXTERNAL PARAMETER, "); + break; + default: + fprintf(output, "UNKNOWN TYPE %d", + cur->etype); + } + if (cur->ExternalID != NULL) + fprintf(output, "ID \"%s\"", cur->ExternalID); + if (cur->SystemID != NULL) + fprintf(output, "SYSTEM \"%s\"", cur->SystemID); + if (cur->orig != NULL) + fprintf(output, "\n orig \"%s\"", cur->orig); + if ((cur->type != XML_ELEMENT_NODE) && + (cur->content != NULL)) + fprintf(output, "\n content \"%s\"", cur->content); + fprintf(output, "\n"); +} + +/** + * xmlDebugDumpEntities: + * @output: the FILE * for the output + * @doc: the document + * + * Dumps debug information for all the entities in use by the document + */ +void +xmlDebugDumpEntities(FILE * output, xmlDocPtr doc) +{ + if (output == NULL) + output = stdout; + if (doc == NULL) { + fprintf(output, "DOCUMENT == NULL !\n"); + return; + } + + switch (doc->type) { + case XML_ELEMENT_NODE: + fprintf(output, "Error, ELEMENT found here "); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, "Error, ATTRIBUTE found here\n"); + break; + case XML_TEXT_NODE: + fprintf(output, "Error, TEXT\n"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, "Error, CDATA_SECTION\n"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, "Error, ENTITY_REF\n"); + break; + case XML_ENTITY_NODE: + fprintf(output, "Error, ENTITY\n"); + break; + case XML_PI_NODE: + fprintf(output, "Error, PI\n"); + break; + case XML_COMMENT_NODE: + fprintf(output, "Error, COMMENT\n"); + break; + case XML_DOCUMENT_NODE: + fprintf(output, "DOCUMENT\n"); + break; + case XML_HTML_DOCUMENT_NODE: + fprintf(output, "HTML DOCUMENT\n"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, "Error, DOCUMENT_TYPE\n"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, "Error, DOCUMENT_FRAG\n"); + break; + case XML_NOTATION_NODE: + fprintf(output, "Error, NOTATION\n"); + break; + default: + fprintf(output, "NODE_%d\n", doc->type); + } + if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { + xmlEntitiesTablePtr table = (xmlEntitiesTablePtr) + doc->intSubset->entities; + + fprintf(output, "Entities in internal subset\n"); + xmlHashScan(table, (xmlHashScanner) xmlDebugDumpEntityCallback, + output); + } else + fprintf(output, "No entities in internal subset\n"); + if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { + xmlEntitiesTablePtr table = (xmlEntitiesTablePtr) + doc->extSubset->entities; + + fprintf(output, "Entities in external subset\n"); + xmlHashScan(table, (xmlHashScanner) xmlDebugDumpEntityCallback, + output); + } else + fprintf(output, "No entities in external subset\n"); +} + +/** + * xmlLsCountNode: + * @node: the node to count + * + * Count the children of @node. + * + * Returns the number of children of @node. + */ +int +xmlLsCountNode(xmlNodePtr node) { + int ret = 0; + xmlNodePtr list = NULL; + + if (node == NULL) + return(0); + + switch (node->type) { + case XML_ELEMENT_NODE: + list = node->children; + break; + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + list = ((xmlDocPtr) node)->children; + break; + case XML_ATTRIBUTE_NODE: + list = ((xmlAttrPtr) node)->children; + break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + if (node->content != NULL) { + ret = xmlStrlen(node->content); + } + break; + case XML_ENTITY_REF_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_ENTITY_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + ret = 1; + break; + } + for (;list != NULL;ret++) + list = list->next; + return(ret); +} + +/** + * xmlLsOneNode: + * @output: the FILE * for the output + * @node: the node to dump + * + * Dump to @output the type and name of @node. + */ +void +xmlLsOneNode(FILE *output, xmlNodePtr node) { + if (node == NULL) { + fprintf(output, "NULL\n"); + return; + } + switch (node->type) { + case XML_ELEMENT_NODE: + fprintf(output, "-"); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, "a"); + break; + case XML_TEXT_NODE: + fprintf(output, "t"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, "C"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, "e"); + break; + case XML_ENTITY_NODE: + fprintf(output, "E"); + break; + case XML_PI_NODE: + fprintf(output, "p"); + break; + case XML_COMMENT_NODE: + fprintf(output, "c"); + break; + case XML_DOCUMENT_NODE: + fprintf(output, "d"); + break; + case XML_HTML_DOCUMENT_NODE: + fprintf(output, "h"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, "T"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, "F"); + break; + case XML_NOTATION_NODE: + fprintf(output, "N"); + break; + case XML_NAMESPACE_DECL: + fprintf(output, "n"); + break; + default: + fprintf(output, "?"); + } + if (node->type != XML_NAMESPACE_DECL) { + if (node->properties != NULL) + fprintf(output, "a"); + else + fprintf(output, "-"); + if (node->nsDef != NULL) + fprintf(output, "n"); + else + fprintf(output, "-"); + } + + fprintf(output, " %8d ", xmlLsCountNode(node)); + + switch (node->type) { + case XML_ELEMENT_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_ATTRIBUTE_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_TEXT_NODE: + if (node->content != NULL) { + xmlDebugDumpString(output, node->content); + } + break; + case XML_CDATA_SECTION_NODE: + break; + case XML_ENTITY_REF_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_ENTITY_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_PI_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_COMMENT_NODE: + break; + case XML_DOCUMENT_NODE: + break; + case XML_HTML_DOCUMENT_NODE: + break; + case XML_DOCUMENT_TYPE_NODE: + break; + case XML_DOCUMENT_FRAG_NODE: + break; + case XML_NOTATION_NODE: + break; + case XML_NAMESPACE_DECL: { + xmlNsPtr ns = (xmlNsPtr) node; + + if (ns->prefix == NULL) + fprintf(output, "default -> %s", ns->href); + else + fprintf(output, "%s -> %s", ns->prefix, ns->href); + break; + } + default: + if (node->name != NULL) + fprintf(output, "%s", node->name); + } + fprintf(output, "\n"); +} + +/** + * xmlBoolToText: + * @boolval: a bool to turn into text + * + * Convenient way to turn bool into text + * + * Returns a pointer to either "True" or "False" + */ +const char * +xmlBoolToText(int boolval) +{ + if (boolval) + return("True"); + else + return("False"); +} + +/**************************************************************** + * * + * The XML shell related functions * + * * + ****************************************************************/ + + + +/* + * TODO: Improvement/cleanups for the XML shell + * - allow to shell out an editor on a subpart + * - cleanup function registrations (with help) and calling + * - provide registration routines + */ + +/** + * xmlShellPrintXPathError: + * @errorType: valid xpath error id + * @arg: the argument that cause xpath to fail + * + * Print the xpath error to libxml default error channel + */ +void +xmlShellPrintXPathError(int errorType, const char *arg) +{ + const char *default_arg = "Result"; + + if (!arg) + arg = default_arg; + + switch (errorType) { + case XPATH_UNDEFINED: + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + break; + + case XPATH_BOOLEAN: + xmlGenericError(xmlGenericErrorContext, + "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + xmlGenericError(xmlGenericErrorContext, + "%s is a number\n", arg); + break; + case XPATH_STRING: + xmlGenericError(xmlGenericErrorContext, + "%s is a string\n", arg); + break; + case XPATH_POINT: + xmlGenericError(xmlGenericErrorContext, + "%s is a point\n", arg); + break; + case XPATH_RANGE: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_LOCATIONSET: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_USERS: + xmlGenericError(xmlGenericErrorContext, + "%s is user-defined\n", arg); + break; + case XPATH_XSLT_TREE: + xmlGenericError(xmlGenericErrorContext, + "%s is an XSLT value tree\n", arg); + break; + } + xmlGenericError(xmlGenericErrorContext, + "Try casting the result string function (xpath builtin)\n", + arg); +} + + +/** + * xmlShellPrintNodeCtxt: + * @ctxt : a non-null shell context + * @node : a non-null node to print to the output FILE + * + * Print node to the output FILE + */ +static void +xmlShellPrintNodeCtxt(xmlShellCtxtPtr ctxt,xmlNodePtr node) +{ + FILE *fp; + + if (!node) + return; + if (ctxt == NULL) + fp = stdout; + else + fp = ctxt->output; + + if (node->type == XML_DOCUMENT_NODE) + xmlDocDump(fp, (xmlDocPtr) node); + else if (node->type == XML_ATTRIBUTE_NODE) + xmlDebugDumpAttrList(fp, (xmlAttrPtr) node, 0); + else + xmlElemDump(fp, node->doc, node); + + fprintf(fp, "\n"); +} + +/** + * xmlShellPrintNode: + * @node : a non-null node to print to the output FILE + * + * Print node to the output FILE + */ +void +xmlShellPrintNode(xmlNodePtr node) +{ + xmlShellPrintNodeCtxt(NULL, node); +} + +/** + * xmlShellPrintXPathResultCtxt: + * @ctxt: a valid shell context + * @list: a valid result generated by an xpath evaluation + * + * Prints result to the output FILE + */ +static void +xmlShellPrintXPathResultCtxt(xmlShellCtxtPtr ctxt,xmlXPathObjectPtr list) +{ + int i = 0; + if (!ctxt) + return; + + if (list != NULL) { + switch (list->type) { + case XPATH_NODESET:{ + int indx; + + if (list->nodesetval) { + for (indx = 0; indx < list->nodesetval->nodeNr; + indx++) { + if (i > 0) + fprintf(stderr, " -------\n"); + xmlShellPrintNodeCtxt(ctxt, + list->nodesetval->nodeTab[indx]); + } + } else { + xmlGenericError(xmlGenericErrorContext, + "Empty node set\n"); + } + break; + } + case XPATH_BOOLEAN: + xmlGenericError(xmlGenericErrorContext, + "Is a Boolean:%s\n", + xmlBoolToText(list->boolval)); + break; + case XPATH_NUMBER: + xmlGenericError(xmlGenericErrorContext, + "Is a number:%0g\n", list->floatval); + break; + case XPATH_STRING: + xmlGenericError(xmlGenericErrorContext, + "Is a string:%s\n", list->stringval); + break; + + default: + xmlShellPrintXPathError(list->type, NULL); + } + } +} + +/** + * xmlShellPrintXPathResult: + * @list: a valid result generated by an xpath evaluation + * + * Prints result to the output FILE + */ +void +xmlShellPrintXPathResult(xmlXPathObjectPtr list) +{ + xmlShellPrintXPathResultCtxt(NULL, list); +} + +/** + * xmlShellList: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "ls" + * Does an Unix like listing of the given node (like a directory) + * + * Returns 0 + */ +int +xmlShellList(xmlShellCtxtPtr ctxt, + char *arg ATTRIBUTE_UNUSED, xmlNodePtr node, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + xmlNodePtr cur; + if (!ctxt) + return (0); + if (node == NULL) { + fprintf(ctxt->output, "NULL\n"); + return (0); + } + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + cur = ((xmlDocPtr) node)->children; + } else if (node->type == XML_NAMESPACE_DECL) { + xmlLsOneNode(ctxt->output, node); + return (0); + } else if (node->children != NULL) { + cur = node->children; + } else { + xmlLsOneNode(ctxt->output, node); + return (0); + } + while (cur != NULL) { + xmlLsOneNode(ctxt->output, cur); + cur = cur->next; + } + return (0); +} + +/** + * xmlShellBase: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "base" + * dumps the current XML base of the node + * + * Returns 0 + */ +int +xmlShellBase(xmlShellCtxtPtr ctxt, + char *arg ATTRIBUTE_UNUSED, xmlNodePtr node, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + xmlChar *base; + if (!ctxt) + return 0; + if (node == NULL) { + fprintf(ctxt->output, "NULL\n"); + return (0); + } + + base = xmlNodeGetBase(node->doc, node); + + if (base == NULL) { + fprintf(ctxt->output, " No base found !!!\n"); + } else { + fprintf(ctxt->output, "%s\n", base); + xmlFree(base); + } + return (0); +} + +/** + * xmlShellSetBase: + * @ctxt: the shell context + * @arg: the new base + * @node: a node + * @node2: unused + * + * Implements the XML shell function "setbase" + * change the current XML base of the node + * + * Returns 0 + */ +static int +xmlShellSetBase(xmlShellCtxtPtr ctxt ATTRIBUTE_UNUSED, + char *arg ATTRIBUTE_UNUSED, xmlNodePtr node, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + xmlNodeSetBase(node, (xmlChar*) arg); + return (0); +} + +/** + * xmlShellGrep: + * @ctxt: the shell context + * @arg: the string or regular expression to find + * @node: a node + * @node2: unused + * + * Implements the XML shell function "grep" + * dumps informations about the node (namespace, attributes, content). + * + * Returns 0 + */ +static int +xmlShellGrep(xmlShellCtxtPtr ctxt ATTRIBUTE_UNUSED, + char *arg, xmlNodePtr node, xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + if (!ctxt) + return (0); + if (node == NULL) + return (0); + if (arg == NULL) + return (0); +#ifdef LIBXML_REGEXP_ENABLED + if ((xmlStrchr((xmlChar *) arg, '?')) || + (xmlStrchr((xmlChar *) arg, '*')) || + (xmlStrchr((xmlChar *) arg, '.')) || + (xmlStrchr((xmlChar *) arg, '['))) { + } +#endif + while (node != NULL) { + if (node->type == XML_COMMENT_NODE) { + if (xmlStrstr(node->content, (xmlChar *) arg)) { + + fprintf(ctxt->output, "%s : ", xmlGetNodePath(node)); + xmlShellList(ctxt, NULL, node, NULL); + } + } else if (node->type == XML_TEXT_NODE) { + if (xmlStrstr(node->content, (xmlChar *) arg)) { + + fprintf(ctxt->output, "%s : ", xmlGetNodePath(node->parent)); + xmlShellList(ctxt, NULL, node->parent, NULL); + } + } + + /* + * Browse the full subtree, deep first + */ + + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + node = ((xmlDocPtr) node)->children; + } else if ((node->children != NULL) + && (node->type != XML_ENTITY_REF_NODE)) { + /* deep first */ + node = node->children; + } else if (node->next != NULL) { + /* then siblings */ + node = node->next; + } else { + /* go up to parents->next if needed */ + while (node != NULL) { + if (node->parent != NULL) { + node = node->parent; + } + if (node->next != NULL) { + node = node->next; + break; + } + if (node->parent == NULL) { + node = NULL; + break; + } + } + } + } + return (0); +} + +/** + * xmlShellDir: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "dir" + * dumps informations about the node (namespace, attributes, content). + * + * Returns 0 + */ +int +xmlShellDir(xmlShellCtxtPtr ctxt ATTRIBUTE_UNUSED, + char *arg ATTRIBUTE_UNUSED, xmlNodePtr node, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + if (!ctxt) + return (0); + if (node == NULL) { + fprintf(ctxt->output, "NULL\n"); + return (0); + } + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + xmlDebugDumpDocumentHead(ctxt->output, (xmlDocPtr) node); + } else if (node->type == XML_ATTRIBUTE_NODE) { + xmlDebugDumpAttr(ctxt->output, (xmlAttrPtr) node, 0); + } else { + xmlDebugDumpOneNode(ctxt->output, node, 0); + } + return (0); +} + +/** + * xmlShellCat: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "cat" + * dumps the serialization node content (XML or HTML). + * + * Returns 0 + */ +int +xmlShellCat(xmlShellCtxtPtr ctxt, char *arg ATTRIBUTE_UNUSED, + xmlNodePtr node, xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + if (!ctxt) + return (0); + if (node == NULL) { + fprintf(ctxt->output, "NULL\n"); + return (0); + } + if (ctxt->doc->type == XML_HTML_DOCUMENT_NODE) { +#ifdef LIBXML_HTML_ENABLED + if (node->type == XML_HTML_DOCUMENT_NODE) + htmlDocDump(ctxt->output, (htmlDocPtr) node); + else + htmlNodeDumpFile(ctxt->output, ctxt->doc, node); +#else + if (node->type == XML_DOCUMENT_NODE) + xmlDocDump(ctxt->output, (xmlDocPtr) node); + else + xmlElemDump(ctxt->output, ctxt->doc, node); +#endif /* LIBXML_HTML_ENABLED */ + } else { + if (node->type == XML_DOCUMENT_NODE) + xmlDocDump(ctxt->output, (xmlDocPtr) node); + else + xmlElemDump(ctxt->output, ctxt->doc, node); + } + fprintf(ctxt->output, "\n"); + return (0); +} + +/** + * xmlShellLoad: + * @ctxt: the shell context + * @filename: the file name + * @node: unused + * @node2: unused + * + * Implements the XML shell function "load" + * loads a new document specified by the filename + * + * Returns 0 or -1 if loading failed + */ +int +xmlShellLoad(xmlShellCtxtPtr ctxt, char *filename, + xmlNodePtr node ATTRIBUTE_UNUSED, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + xmlDocPtr doc; + int html = 0; + + if (ctxt->doc != NULL) + html = (ctxt->doc->type == XML_HTML_DOCUMENT_NODE); + + if (html) { +#ifdef LIBXML_HTML_ENABLED + doc = htmlParseFile(filename, NULL); +#else + fprintf(ctxt->output, "HTML support not compiled in\n"); + doc = NULL; +#endif /* LIBXML_HTML_ENABLED */ + } else { + doc = xmlParseFile(filename); + } + if (doc != NULL) { + if (ctxt->loaded == 1) { + xmlFreeDoc(ctxt->doc); + } + ctxt->loaded = 1; +#ifdef LIBXML_XPATH_ENABLED + xmlXPathFreeContext(ctxt->pctxt); +#endif /* LIBXML_XPATH_ENABLED */ + xmlFree(ctxt->filename); + ctxt->doc = doc; + ctxt->node = (xmlNodePtr) doc; +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt = xmlXPathNewContext(doc); +#endif /* LIBXML_XPATH_ENABLED */ + ctxt->filename = (char *) xmlStrdup((xmlChar *) filename); + } else + return (-1); + return (0); +} + +/** + * xmlShellWrite: + * @ctxt: the shell context + * @filename: the file name + * @node: a node in the tree + * @node2: unused + * + * Implements the XML shell function "write" + * Write the current node to the filename, it saves the serialization + * of the subtree under the @node specified + * + * Returns 0 or -1 in case of error + */ +int +xmlShellWrite(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + if (node == NULL) + return (-1); + if ((filename == NULL) || (filename[0] == 0)) { + xmlGenericError(xmlGenericErrorContext, + "Write command requires a filename argument\n"); + return (-1); + } +#ifdef W_OK + if (access((char *) filename, W_OK)) { + xmlGenericError(xmlGenericErrorContext, + "Cannot write to %s\n", filename); + return (-1); + } +#endif + switch (node->type) { + case XML_DOCUMENT_NODE: + if (xmlSaveFile((char *) filename, ctxt->doc) < -1) { + xmlGenericError(xmlGenericErrorContext, + "Failed to write to %s\n", filename); + return (-1); + } + break; + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_HTML_ENABLED + if (htmlSaveFile((char *) filename, ctxt->doc) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to write to %s\n", filename); + return (-1); + } +#else + if (xmlSaveFile((char *) filename, ctxt->doc) < -1) { + xmlGenericError(xmlGenericErrorContext, + "Failed to write to %s\n", filename); + return (-1); + } +#endif /* LIBXML_HTML_ENABLED */ + break; + default:{ + FILE *f; + + f = fopen((char *) filename, "w"); + if (f == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Failed to write to %s\n", filename); + return (-1); + } + xmlElemDump(f, ctxt->doc, node); + fclose(f); + } + } + return (0); +} + +/** + * xmlShellSave: + * @ctxt: the shell context + * @filename: the file name (optional) + * @node: unused + * @node2: unused + * + * Implements the XML shell function "save" + * Write the current document to the filename, or it's original name + * + * Returns 0 or -1 in case of error + */ +int +xmlShellSave(xmlShellCtxtPtr ctxt, char *filename, + xmlNodePtr node ATTRIBUTE_UNUSED, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + if (ctxt->doc == NULL) + return (-1); + if ((filename == NULL) || (filename[0] == 0)) + filename = ctxt->filename; +#ifdef W_OK + if (access((char *) filename, W_OK)) { + xmlGenericError(xmlGenericErrorContext, + "Cannot save to %s\n", filename); + return (-1); + } +#endif + switch (ctxt->doc->type) { + case XML_DOCUMENT_NODE: + if (xmlSaveFile((char *) filename, ctxt->doc) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to save to %s\n", filename); + } + break; + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_HTML_ENABLED + if (htmlSaveFile((char *) filename, ctxt->doc) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to save to %s\n", filename); + } +#else + if (xmlSaveFile((char *) filename, ctxt->doc) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to save to %s\n", filename); + } +#endif /* LIBXML_HTML_ENABLED */ + break; + default: + xmlGenericError(xmlGenericErrorContext, + "To save to subparts of a document use the 'write' command\n"); + return (-1); + + } + return (0); +} + +/** + * xmlShellValidate: + * @ctxt: the shell context + * @dtd: the DTD URI (optional) + * @node: unused + * @node2: unused + * + * Implements the XML shell function "validate" + * Validate the document, if a DTD path is provided, then the validation + * is done against the given DTD. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellValidate(xmlShellCtxtPtr ctxt, char *dtd, + xmlNodePtr node ATTRIBUTE_UNUSED, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + xmlValidCtxt vctxt; + int res = -1; + + vctxt.userData = stderr; + vctxt.error = (xmlValidityErrorFunc) fprintf; + vctxt.warning = (xmlValidityWarningFunc) fprintf; + + if ((dtd == NULL) || (dtd[0] == 0)) { + res = xmlValidateDocument(&vctxt, ctxt->doc); + } else { + xmlDtdPtr subset; + + subset = xmlParseDTD(NULL, (xmlChar *) dtd); + if (subset != NULL) { + res = xmlValidateDtd(&vctxt, ctxt->doc, subset); + + xmlFreeDtd(subset); + } + } + return (res); +} + +/** + * xmlShellDu: + * @ctxt: the shell context + * @arg: unused + * @tree: a node defining a subtree + * @node2: unused + * + * Implements the XML shell function "du" + * show the structure of the subtree under node @tree + * If @tree is null, the command works on the current node. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellDu(xmlShellCtxtPtr ctxt, + char *arg ATTRIBUTE_UNUSED, xmlNodePtr tree, + xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + xmlNodePtr node; + int indent = 0, i; + + if (!ctxt) + return (-1); + + if (tree == NULL) + return (-1); + node = tree; + while (node != NULL) { + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + fprintf(ctxt->output, "/\n"); + } else if (node->type == XML_ELEMENT_NODE) { + for (i = 0; i < indent; i++) + fprintf(ctxt->output, " "); + fprintf(ctxt->output, "%s\n", node->name); + } else { + } + + /* + * Browse the full subtree, deep first + */ + + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + node = ((xmlDocPtr) node)->children; + } else if ((node->children != NULL) + && (node->type != XML_ENTITY_REF_NODE)) { + /* deep first */ + node = node->children; + indent++; + } else if ((node != tree) && (node->next != NULL)) { + /* then siblings */ + node = node->next; + } else if (node != tree) { + /* go up to parents->next if needed */ + while (node != tree) { + if (node->parent != NULL) { + node = node->parent; + indent--; + } + if ((node != tree) && (node->next != NULL)) { + node = node->next; + break; + } + if (node->parent == NULL) { + node = NULL; + break; + } + if (node == tree) { + node = NULL; + break; + } + } + /* exit condition */ + if (node == tree) + node = NULL; + } else + node = NULL; + } + return (0); +} + +/** + * xmlShellPwd: + * @ctxt: the shell context + * @buffer: the output buffer + * @node: a node + * @node2: unused + * + * Implements the XML shell function "pwd" + * Show the full path from the root to the node, if needed building + * thumblers when similar elements exists at a given ancestor level. + * The output is compatible with XPath commands. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellPwd(xmlShellCtxtPtr ctxt ATTRIBUTE_UNUSED, char *buffer, + xmlNodePtr node, xmlNodePtr node2 ATTRIBUTE_UNUSED) +{ + xmlChar *path; + + if (node == NULL) + return (-1); + + path = xmlGetNodePath(node); + if (path == NULL) + return (-1); + + /* + * This test prevents buffer overflow, because this routine + * is only called by xmlShell, in which the second argument is + * 500 chars long. + * It is a dirty hack before a cleaner solution is found. + * Documentation should mention that the second argument must + * be at least 500 chars long, and could be stripped if too long. + */ + snprintf(buffer, 499, "%s", path); + buffer[499] = '0'; + xmlFree(path); + + return (0); +} + +/** + * xmlShell: + * @doc: the initial document + * @filename: the output buffer + * @input: the line reading function + * @output: the output FILE*, defaults to stdout if NULL + * + * Implements the XML shell + * This allow to load, validate, view, modify and save a document + * using a environment similar to a UNIX commandline. + */ +void +xmlShell(xmlDocPtr doc, char *filename, xmlShellReadlineFunc input, + FILE * output) +{ + char prompt[500] = "/ > "; + char *cmdline = NULL, *cur; + int nbargs; + char command[100]; + char arg[400]; + int i; + xmlShellCtxtPtr ctxt; + xmlXPathObjectPtr list; + + if (doc == NULL) + return; + if (filename == NULL) + return; + if (input == NULL) + return; + if (output == NULL) + output = stdout; + ctxt = (xmlShellCtxtPtr) xmlMalloc(sizeof(xmlShellCtxt)); + if (ctxt == NULL) + return; + ctxt->loaded = 0; + ctxt->doc = doc; + ctxt->input = input; + ctxt->output = output; + ctxt->filename = (char *) xmlStrdup((xmlChar *) filename); + ctxt->node = (xmlNodePtr) ctxt->doc; + +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt = xmlXPathNewContext(ctxt->doc); + if (ctxt->pctxt == NULL) { + xmlFree(ctxt); + return; + } +#endif /* LIBXML_XPATH_ENABLED */ + while (1) { + if (ctxt->node == (xmlNodePtr) ctxt->doc) + snprintf(prompt, sizeof(prompt), "%s > ", "/"); + else if (ctxt->node->name) + snprintf(prompt, sizeof(prompt), "%s > ", ctxt->node->name); + else + snprintf(prompt, sizeof(prompt), "? > "); + prompt[sizeof(prompt) - 1] = 0; + + /* + * Get a new command line + */ + cmdline = ctxt->input(prompt); + if (cmdline == NULL) + break; + + /* + * Parse the command itself + */ + cur = cmdline; + nbargs = 0; + while ((*cur == ' ') || (*cur == '\t')) + cur++; + i = 0; + while ((*cur != ' ') && (*cur != '\t') && + (*cur != '\n') && (*cur != '\r')) { + if (*cur == 0) + break; + command[i++] = *cur++; + } + command[i] = 0; + if (i == 0) + continue; + nbargs++; + + /* + * Parse the argument + */ + while ((*cur == ' ') || (*cur == '\t')) + cur++; + i = 0; + while ((*cur != '\n') && (*cur != '\r') && (*cur != 0)) { + if (*cur == 0) + break; + arg[i++] = *cur++; + } + arg[i] = 0; + if (i != 0) + nbargs++; + + /* + * start interpreting the command + */ + if (!strcmp(command, "exit")) + break; + if (!strcmp(command, "quit")) + break; + if (!strcmp(command, "bye")) + break; + if (!strcmp(command, "help")) { + fprintf(ctxt->output, "\tbase display XML base of the node\n"); + fprintf(ctxt->output, "\tsetbase URI change the XML base of the node\n"); + fprintf(ctxt->output, "\tbye leave shell\n"); + fprintf(ctxt->output, "\tcat [node] display node or current node\n"); + fprintf(ctxt->output, "\tcd [path] change directory to path or to root\n"); + fprintf(ctxt->output, "\tdir [path] dumps informations about the node (namespace, attributes, content)\n"); + fprintf(ctxt->output, "\tdu [path] show the structure of the subtree under path or the current node\n"); + fprintf(ctxt->output, "\texit leave shell\n"); + fprintf(ctxt->output, "\thelp display this help\n"); + fprintf(ctxt->output, "\tfree display memory usage\n"); + fprintf(ctxt->output, "\tload [name] load a new document with name\n"); + fprintf(ctxt->output, "\tls [path] list contents of path or the current directory\n"); +#ifdef LIBXML_XPATH_ENABLED + fprintf(ctxt->output, "\txpath expr evaluate the XPath expression in that context and print the result\n"); +#endif /* LIBXML_XPATH_ENABLED */ + fprintf(ctxt->output, "\tpwd display current working directory\n"); + fprintf(ctxt->output, "\tquit leave shell\n"); + fprintf(ctxt->output, "\tsave [name] save this document to name or the original name\n"); + fprintf(ctxt->output, "\tvalidate check the document for errors\n"); + fprintf(ctxt->output, "\twrite [name] write the current node to the filename\n"); + fprintf(ctxt->output, "\tgrep string search for a string in the subtree\n"); + } else if (!strcmp(command, "validate")) { + xmlShellValidate(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "load")) { + xmlShellLoad(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "save")) { + xmlShellSave(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "write")) { + xmlShellWrite(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "grep")) { + xmlShellGrep(ctxt, arg, ctxt->node, NULL); + } else if (!strcmp(command, "free")) { + if (arg[0] == 0) { + xmlMemShow(ctxt->output, 0); + } else { + int len = 0; + + sscanf(arg, "%d", &len); + xmlMemShow(ctxt->output, len); + } + } else if (!strcmp(command, "pwd")) { + char dir[500]; + + if (!xmlShellPwd(ctxt, dir, ctxt->node, NULL)) + fprintf(ctxt->output, "%s\n", dir); + } else if (!strcmp(command, "du")) { + xmlShellDu(ctxt, NULL, ctxt->node, NULL); + } else if (!strcmp(command, "base")) { + xmlShellBase(ctxt, NULL, ctxt->node, NULL); +#ifdef LIBXML_XPATH_ENABLED + } else if (!strcmp(command, "xpath")) { + if (arg[0] == 0) { + xmlGenericError(xmlGenericErrorContext, + "xpath: expression required\n"); + } else { + ctxt->pctxt->node = ctxt->node; + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); + xmlXPathDebugDumpObject(ctxt->output, list, 0); + xmlXPathFreeObject(list); + } +#endif /* LIBXML_XPATH_ENABLED */ + } else if (!strcmp(command, "setbase")) { + xmlShellSetBase(ctxt, arg, ctxt->node, NULL); + } else if ((!strcmp(command, "ls")) || (!strcmp(command, "dir"))) { + int dir = (!strcmp(command, "dir")); + + if (arg[0] == 0) { + if (dir) + xmlShellDir(ctxt, NULL, ctxt->node, NULL); + else + xmlShellList(ctxt, NULL, ctxt->node, NULL); + } else { + ctxt->pctxt->node = ctxt->node; +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt->node = ctxt->node; + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); +#else + list = NULL; +#endif /* LIBXML_XPATH_ENABLED */ + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + break; + case XPATH_NODESET:{ + int indx; + + if (list->nodesetval == NULL) + break; + + for (indx = 0; + indx < list->nodesetval->nodeNr; + indx++) { + if (dir) + xmlShellDir(ctxt, NULL, + list->nodesetval-> + nodeTab[indx], NULL); + else + xmlShellList(ctxt, NULL, + list->nodesetval-> + nodeTab[indx], NULL); + } + break; + } + case XPATH_BOOLEAN: + xmlGenericError(xmlGenericErrorContext, + "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + xmlGenericError(xmlGenericErrorContext, + "%s is a number\n", arg); + break; + case XPATH_STRING: + xmlGenericError(xmlGenericErrorContext, + "%s is a string\n", arg); + break; + case XPATH_POINT: + xmlGenericError(xmlGenericErrorContext, + "%s is a point\n", arg); + break; + case XPATH_RANGE: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_LOCATIONSET: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_USERS: + xmlGenericError(xmlGenericErrorContext, + "%s is user-defined\n", arg); + break; + case XPATH_XSLT_TREE: + xmlGenericError(xmlGenericErrorContext, + "%s is an XSLT value tree\n", + arg); + break; + } +#ifdef LIBXML_XPATH_ENABLED + xmlXPathFreeObject(list); +#endif + } else { + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + } + ctxt->pctxt->node = NULL; + } + } else if (!strcmp(command, "cd")) { + if (arg[0] == 0) { + ctxt->node = (xmlNodePtr) ctxt->doc; + } else { +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt->node = ctxt->node; + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); +#else + list = NULL; +#endif /* LIBXML_XPATH_ENABLED */ + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + break; + case XPATH_NODESET: + if (list->nodesetval != NULL) { + if (list->nodesetval->nodeNr == 1) { + ctxt->node = list->nodesetval->nodeTab[0]; + } else + xmlGenericError(xmlGenericErrorContext, + "%s is a %d Node Set\n", + arg, + list->nodesetval->nodeNr); + } else + xmlGenericError(xmlGenericErrorContext, + "%s is an empty Node Set\n", + arg); + break; + case XPATH_BOOLEAN: + xmlGenericError(xmlGenericErrorContext, + "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + xmlGenericError(xmlGenericErrorContext, + "%s is a number\n", arg); + break; + case XPATH_STRING: + xmlGenericError(xmlGenericErrorContext, + "%s is a string\n", arg); + break; + case XPATH_POINT: + xmlGenericError(xmlGenericErrorContext, + "%s is a point\n", arg); + break; + case XPATH_RANGE: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_LOCATIONSET: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_USERS: + xmlGenericError(xmlGenericErrorContext, + "%s is user-defined\n", arg); + break; + case XPATH_XSLT_TREE: + xmlGenericError(xmlGenericErrorContext, + "%s is an XSLT value tree\n", + arg); + break; + } +#ifdef LIBXML_XPATH_ENABLED + xmlXPathFreeObject(list); +#endif + } else { + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + } + ctxt->pctxt->node = NULL; + } + } else if (!strcmp(command, "cat")) { + if (arg[0] == 0) { + xmlShellCat(ctxt, NULL, ctxt->node, NULL); + } else { + ctxt->pctxt->node = ctxt->node; +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt->node = ctxt->node; + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); +#else + list = NULL; +#endif /* LIBXML_XPATH_ENABLED */ + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + break; + case XPATH_NODESET:{ + int indx; + + if (list->nodesetval == NULL) + break; + + for (indx = 0; + indx < list->nodesetval->nodeNr; + indx++) { + if (i > 0) + fprintf(ctxt->output, " -------\n"); + xmlShellCat(ctxt, NULL, + list->nodesetval-> + nodeTab[indx], NULL); + } + break; + } + case XPATH_BOOLEAN: + xmlGenericError(xmlGenericErrorContext, + "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + xmlGenericError(xmlGenericErrorContext, + "%s is a number\n", arg); + break; + case XPATH_STRING: + xmlGenericError(xmlGenericErrorContext, + "%s is a string\n", arg); + break; + case XPATH_POINT: + xmlGenericError(xmlGenericErrorContext, + "%s is a point\n", arg); + break; + case XPATH_RANGE: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_LOCATIONSET: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_USERS: + xmlGenericError(xmlGenericErrorContext, + "%s is user-defined\n", arg); + break; + case XPATH_XSLT_TREE: + xmlGenericError(xmlGenericErrorContext, + "%s is an XSLT value tree\n", + arg); + break; + } +#ifdef LIBXML_XPATH_ENABLED + xmlXPathFreeObject(list); +#endif + } else { + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + } + ctxt->pctxt->node = NULL; + } + } else { + xmlGenericError(xmlGenericErrorContext, + "Unknown command %s\n", command); + } + free(cmdline); /* not xmlFree here ! */ + } +#ifdef LIBXML_XPATH_ENABLED + xmlXPathFreeContext(ctxt->pctxt); +#endif /* LIBXML_XPATH_ENABLED */ + if (ctxt->loaded) { + xmlFreeDoc(ctxt->doc); + } + if (ctxt->filename != NULL) + xmlFree(ctxt->filename); + xmlFree(ctxt); + if (cmdline != NULL) + free(cmdline); /* not xmlFree here ! */ +} + +#endif /* LIBXML_DEBUG_ENABLED */ diff --git a/bundle/libxml/encoding.c b/bundle/libxml/encoding.c new file mode 100644 index 0000000000..69d67cd6b9 --- /dev/null +++ b/bundle/libxml/encoding.c @@ -0,0 +1,2340 @@ +/* + * encoding.c : implements the encoding conversion functions needed for XML + * + * Related specs: + * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies + * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau + * [ISO-10646] UTF-8 and UTF-16 in Annexes + * [ISO-8859-1] ISO Latin-1 characters codes. + * [UNICODE] The Unicode Consortium, "The Unicode Standard -- + * Worldwide Character Encoding -- Version 1.0", Addison- + * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is + * described in Unicode Technical Report #4. + * [US-ASCII] Coded Character Set--7-bit American Standard Code for + * Information Interchange, ANSI X3.4-1986. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + * + * UTF8 string routines from: + * "William M. Brack" <wbrack@mmm.com.hk> + * + * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> + +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef LIBXML_ICONV_ENABLED +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#endif +#include <libxml/encoding.h> +#include <libxml/xmlmemory.h> +#ifdef LIBXML_HTML_ENABLED +#include <libxml/HTMLparser.h> +#endif +#include <libxml/globals.h> +#include <libxml/xmlerror.h> + +static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; +static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; + +typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; +typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; +struct _xmlCharEncodingAlias { + const char *name; + const char *alias; +}; + +static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; +static int xmlCharEncodingAliasesNb = 0; +static int xmlCharEncodingAliasesMax = 0; + +#ifdef LIBXML_ICONV_ENABLED +#if 0 +#define DEBUG_ENCODING /* Define this to get encoding traces */ +#endif +#endif + +static int xmlLittleEndian = 1; + +/************************************************************************ + * * + * Generic UTF8 handling routines * + * * + * From rfc2044: encoding of the Unicode values on UTF-8: * + * * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) * + * 0000 0000-0000 007F 0xxxxxxx * + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * + * * + * I hope we won't use values > 0xFFFF anytime soon ! * + * * + ************************************************************************/ + +/** + * xmlUTF8Strlen: + * @utf: a sequence of UTF-8 encoded bytes + * + * compute the length of an UTF8 string, it doesn't do a full UTF8 + * checking of the content of the string. + * + * Returns the number of characters in the string or -1 in case of error + */ +int +xmlUTF8Strlen(const xmlChar *utf) { + int ret = 0; + + if (utf == NULL) + return(-1); + + while (*utf != 0) { + if (utf[0] & 0x80) { + if ((utf[1] & 0xc0) != 0x80) + return(-1); + if ((utf[0] & 0xe0) == 0xe0) { + if ((utf[2] & 0xc0) != 0x80) + return(-1); + if ((utf[0] & 0xf0) == 0xf0) { + if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80) + return(-1); + utf += 4; + } else { + utf += 3; + } + } else { + utf += 2; + } + } else { + utf++; + } + ret++; + } + return(ret); +} + +/** + * xmlGetUTF8Char: + * @utf: a sequence of UTF-8 encoded bytes + * @len: a pointer to @bytes len + * + * Read one UTF8 Char from @utf + * + * Returns the char value or -1 in case of error and update @len with the + * number of bytes used + */ +int +xmlGetUTF8Char(const unsigned char *utf, int *len) { + unsigned int c; + + if (utf == NULL) + goto error; + if (len == NULL) + goto error; + if (*len < 1) + goto error; + + c = utf[0]; + if (c & 0x80) { + if (*len < 2) + goto error; + if ((utf[1] & 0xc0) != 0x80) + goto error; + if ((c & 0xe0) == 0xe0) { + if (*len < 3) + goto error; + if ((utf[2] & 0xc0) != 0x80) + goto error; + if ((c & 0xf0) == 0xf0) { + if (*len < 4) + goto error; + if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80) + goto error; + *len = 4; + /* 4-byte code */ + c = (utf[0] & 0x7) << 18; + c |= (utf[1] & 0x3f) << 12; + c |= (utf[2] & 0x3f) << 6; + c |= utf[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + c = (utf[0] & 0xf) << 12; + c |= (utf[1] & 0x3f) << 6; + c |= utf[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + c = (utf[0] & 0x1f) << 6; + c |= utf[1] & 0x3f; + } + } else { + /* 1-byte code */ + *len = 1; + } + return(c); + +error: + *len = 0; + return(-1); +} + +/** + * xmlCheckUTF8: + * @utf: Pointer to putative utf-8 encoded string. + * + * Checks @utf for being valid utf-8. @utf is assumed to be + * null-terminated. This function is not super-strict, as it will + * allow longer utf-8 sequences than necessary. Note that Java is + * capable of producing these sequences if provoked. Also note, this + * routine checks for the 4-byte maximum size, but does not check for + * 0x10ffff maximum value. + * + * Return value: true if @utf is valid. + **/ +int +xmlCheckUTF8(const unsigned char *utf) +{ + int ix; + unsigned char c; + + for (ix = 0; (c = utf[ix]);) { + if (c & 0x80) { + if ((utf[ix + 1] & 0xc0) != 0x80) + return(0); + if ((c & 0xe0) == 0xe0) { + if ((utf[ix + 2] & 0xc0) != 0x80) + return(0); + if ((c & 0xf0) == 0xf0) { + if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80) + return(0); + ix += 4; + /* 4-byte code */ + } else + /* 3-byte code */ + ix += 3; + } else + /* 2-byte code */ + ix += 2; + } else + /* 1-byte code */ + ix++; + } + return(1); +} + +/** + * xmlUTF8Strsize: + * @utf: a sequence of UTF-8 encoded bytes + * @len: the number of characters in the array + * + * storage size of an UTF8 string + * + * Returns the storage size of + * the first 'len' characters of ARRAY + * + */ + +int +xmlUTF8Strsize(const xmlChar *utf, int len) { + const xmlChar *ptr=utf; + xmlChar ch; + + if (len <= 0) + return(0); + + while ( len-- > 0) { + if ( !*ptr ) + break; + if ( (ch = *ptr++) & 0x80) + while ( (ch<<=1) & 0x80 ) + ptr++; + } + return (ptr - utf); +} + + +/** + * xmlUTF8Strndup: + * @utf: the input UTF8 * + * @len: the len of @utf (in chars) + * + * a strndup for array of UTF8's + * + * Returns a new UTF8 * or NULL + */ +xmlChar * +xmlUTF8Strndup(const xmlChar *utf, int len) { + xmlChar *ret; + int i; + + if ((utf == NULL) || (len < 0)) return(NULL); + i = xmlUTF8Strsize(utf, len); + ret = (xmlChar *) xmlMalloc((i + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %ld byte failed\n", + (len + 1) * (long)sizeof(xmlChar)); + return(NULL); + } + memcpy(ret, utf, i * sizeof(xmlChar)); + ret[i] = 0; + return(ret); +} + +/** + * xmlUTF8Strpos: + * @utf: the input UTF8 * + * @pos: the position of the desired UTF8 char (in chars) + * + * a function to provide the equivalent of fetching a + * character from a string array + * + * Returns a pointer to the UTF8 character or NULL + */ +xmlChar * +xmlUTF8Strpos(const xmlChar *utf, int pos) { + xmlChar ch; + + if (utf == NULL) return(NULL); + if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) ) + return(NULL); + while (pos--) { + if ((ch=*utf++) == 0) return(NULL); + if ( ch & 0x80 ) { + /* if not simple ascii, verify proper format */ + if ( (ch & 0xc0) != 0xc0 ) + return(NULL); + /* then skip over remaining bytes for this char */ + while ( (ch <<= 1) & 0x80 ) + if ( (*utf++ & 0xc0) != 0x80 ) + return(NULL); + } + } + return((xmlChar *)utf); +} + +/** + * xmlUTF8Strloc: + * @utf: the input UTF8 * + * @utfchar: the UTF8 character to be found + * + * a function to provide relative location of a UTF8 char + * + * Returns the relative character position of the desired char + * or -1 if not found + */ +int +xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) { + int i, size; + xmlChar ch; + + if (utf==NULL || utfchar==NULL) return -1; + size = xmlUTF8Strsize(utfchar, 1); + for(i=0; (ch=*utf) != 0; i++) { + if (xmlStrncmp(utf, utfchar, size)==0) + return(i); + utf++; + if ( ch & 0x80 ) { + /* if not simple ascii, verify proper format */ + if ( (ch & 0xc0) != 0xc0 ) + return(-1); + /* then skip over remaining bytes for this char */ + while ( (ch <<= 1) & 0x80 ) + if ( (*utf++ & 0xc0) != 0x80 ) + return(-1); + } + } + + return(-1); +} +/** + * xmlUTF8Strsub: + * @utf: a sequence of UTF-8 encoded bytes + * @start: relative pos of first char + * @len: total number to copy + * + * Note: positions are given in units of UTF-8 chars + * + * Returns a pointer to a newly created string + * or NULL if any problem + */ + +xmlChar * +xmlUTF8Strsub(const xmlChar *utf, int start, int len) { + int i; + xmlChar ch; + + if (utf == NULL) return(NULL); + if (start < 0) return(NULL); + if (len < 0) return(NULL); + + /* + * Skip over any leading chars + */ + for (i = 0;i < start;i++) { + if ((ch=*utf++) == 0) return(NULL); + if ( ch & 0x80 ) { + /* if not simple ascii, verify proper format */ + if ( (ch & 0xc0) != 0xc0 ) + return(NULL); + /* then skip over remaining bytes for this char */ + while ( (ch <<= 1) & 0x80 ) + if ( (*utf++ & 0xc0) != 0x80 ) + return(NULL); + } + } + + return(xmlUTF8Strndup(utf, len)); +} + +/************************************************************************ + * * + * Conversions To/From UTF8 encoding * + * * + ************************************************************************/ + +/** + * asciiToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of ASCII chars + * @inlen: the length of @in + * + * Take a block of ASCII chars in and try to convert it to an UTF-8 + * block of chars out. + * Returns 0 if success, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of ocetes consumed. + */ +static int +asciiToUTF8(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + unsigned char* outstart = out; + const unsigned char* base = in; + const unsigned char* processed = in; + unsigned char* outend = out + *outlen; + const unsigned char* inend; + unsigned int c; + int bits; + + inend = in + (*inlen); + while ((in < inend) && (out - outstart + 5 < *outlen)) { + c= *in++; + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else { + *outlen = out - outstart; + *inlen = processed - base; + return(-1); + } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlen = processed - base; + return(0); +} + +/** + * UTF8Toascii: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an ASCII + * block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of ocetes consumed. + */ +static int +UTF8Toascii(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + const unsigned char* processed = in; + const unsigned char* outend; + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend; + unsigned int c, d; + int trailing; + + if (in == NULL) { + /* + * initialization nothing to do + */ + *outlen = 0; + *inlen = 0; + return(0); + } + inend = in + (*inlen); + outend = out + (*outlen); + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) + break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x80) { + if (out >= outend) + break; + *out++ = c; + } else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + +/** + * isolat1ToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of ISO Latin 1 chars + * @inlen: the length of @in + * + * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 + * block of chars out. + * Returns 0 if success, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of ocetes consumed. + */ +int +isolat1ToUTF8(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + unsigned char* outstart = out; + const unsigned char* base = in; + unsigned char* outend = out + *outlen; + const unsigned char* inend; + const unsigned char* instop; + xmlChar c = *in; + + inend = in + (*inlen); + instop = inend; + + while (in < inend && out < outend - 1) { + if (c >= 0x80) { + *out++= ((c >> 6) & 0x1F) | 0xC0; + *out++= (c & 0x3F) | 0x80; + ++in; + c = *in; + } + if (instop - in > outend - out) instop = in + (outend - out); + while (c < 0x80 && in < instop) { + *out++ = c; + ++in; + c = *in; + } + } + if (in < inend && out < outend && c < 0x80) { + *out++ = c; + ++in; + } + *outlen = out - outstart; + *inlen = in - base; + return(0); +} + + +/** + * UTF8Toisolat1: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 + * block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of ocetes consumed. + */ +int +UTF8Toisolat1(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + const unsigned char* processed = in; + const unsigned char* outend; + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend; + unsigned int c, d; + int trailing; + + if (in == NULL) { + /* + * initialization nothing to do + */ + *outlen = 0; + *inlen = 0; + return(0); + } + inend = in + (*inlen); + outend = out + (*outlen); + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in IsoLat1 */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if (in >= inend) + break; + if (((d= *in++) & 0xC0) != 0x80) { + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c <= 0xFF) { + if (out >= outend) + break; + *out++ = c; + } else { + /* no chance for this in IsoLat1 */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + +/** + * UTF16LEToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @inb: a pointer to an array of UTF-16LE passwd as a byte array + * @inlenb: the length of @in in UTF-16LE chars + * + * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 + * block of chars out. This function assume the endian property + * is the same between the native type of this machine and the + * inputed one. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding fails (for *in is not valid utf16 string) + * The value of *inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + */ +static int +UTF16LEToUTF8(unsigned char* out, int *outlen, + const unsigned char* inb, int *inlenb) +{ + unsigned char* outstart = out; + const unsigned char* processed = inb; + unsigned char* outend = out + *outlen; + unsigned short* in = (unsigned short*) inb; + unsigned short* inend; + unsigned int c, d, inlen; + unsigned char *tmp; + int bits; + + if ((*inlenb % 2) == 1) + (*inlenb)--; + inlen = *inlenb / 2; + inend = in + inlen; + while ((in < inend) && (out - outstart + 5 < *outlen)) { + if (xmlLittleEndian) { + c= *in++; + } else { + tmp = (unsigned char *) in; + c = *tmp++; + c = c | (((unsigned int)*tmp) << 8); + in++; + } + if ((c & 0xFC00) == 0xD800) { /* surrogates */ + if (in >= inend) { /* (in > inend) shouldn't happens */ + break; + } + if (xmlLittleEndian) { + d = *in++; + } else { + tmp = (unsigned char *) in; + d = *tmp++; + d = d | (((unsigned int)*tmp) << 8); + in++; + } + if ((d & 0xFC00) == 0xDC00) { + c &= 0x03FF; + c <<= 10; + c |= d & 0x03FF; + c += 0x10000; + } + else { + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + } + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlenb = processed - inb; + return(0); +} + +/** + * UTF8ToUTF16LE: + * @outb: a pointer to an array of bytes to store the result + * @outlen: the length of @outb + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE + * block of chars out. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + */ +static int +UTF8ToUTF16LE(unsigned char* outb, int *outlen, + const unsigned char* in, int *inlen) +{ + unsigned short* out = (unsigned short*) outb; + const unsigned char* processed = in; + unsigned short* outstart= out; + unsigned short* outend; + const unsigned char* inend= in+*inlen; + unsigned int c, d; + int trailing; + unsigned char *tmp; + unsigned short tmp1, tmp2; + + if (in == NULL) { + /* + * initialization, add the Byte Order Mark + */ + if (*outlen >= 2) { + outb[0] = 0xFF; + outb[1] = 0xFE; + *outlen = 2; + *inlen = 0; +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Added FFFE Byte Order Mark\n"); +#endif + return(2); + } + *outlen = 0; + *inlen = 0; + return(0); + } + outend = out + (*outlen / 2); + while (in < inend) { + d= *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = (out - outstart) * 2; + *inlen = processed - in; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in UTF-16 */ + *outlen = (out - outstart) * 2; + *inlen = processed - in; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) + break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x10000) { + if (out >= outend) + break; + if (xmlLittleEndian) { + *out++ = c; + } else { + tmp = (unsigned char *) out; + *tmp = c ; + *(tmp + 1) = c >> 8 ; + out++; + } + } + else if (c < 0x110000) { + if (out+1 >= outend) + break; + c -= 0x10000; + if (xmlLittleEndian) { + *out++ = 0xD800 | (c >> 10); + *out++ = 0xDC00 | (c & 0x03FF); + } else { + tmp1 = 0xD800 | (c >> 10); + tmp = (unsigned char *) out; + *tmp = (unsigned char) tmp1; + *(tmp + 1) = tmp1 >> 8; + out++; + + tmp2 = 0xDC00 | (c & 0x03FF); + tmp = (unsigned char *) out; + *tmp = (unsigned char) tmp2; + *(tmp + 1) = tmp2 >> 8; + out++; + } + } + else + break; + processed = in; + } + *outlen = (out - outstart) * 2; + *inlen = processed - in; + return(0); +} + +/** + * UTF16BEToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @inb: a pointer to an array of UTF-16 passwd as a byte array + * @inlenb: the length of @in in UTF-16 chars + * + * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 + * block of chars out. This function assume the endian property + * is the same between the native type of this machine and the + * inputed one. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding fails (for *in is not valid utf16 string) + * The value of *inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + */ +static int +UTF16BEToUTF8(unsigned char* out, int *outlen, + const unsigned char* inb, int *inlenb) +{ + unsigned char* outstart = out; + const unsigned char* processed = inb; + unsigned char* outend = out + *outlen; + unsigned short* in = (unsigned short*) inb; + unsigned short* inend; + unsigned int c, d, inlen; + unsigned char *tmp; + int bits; + + if ((*inlenb % 2) == 1) + (*inlenb)--; + inlen = *inlenb / 2; + inend= in + inlen; + while (in < inend) { + if (xmlLittleEndian) { + tmp = (unsigned char *) in; + c = *tmp++; + c = c << 8; + c = c | (unsigned int) *tmp; + in++; + } else { + c= *in++; + } + if ((c & 0xFC00) == 0xD800) { /* surrogates */ + if (in >= inend) { /* (in > inend) shouldn't happens */ + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + if (xmlLittleEndian) { + tmp = (unsigned char *) in; + d = *tmp++; + d = d << 8; + d = d | (unsigned int) *tmp; + in++; + } else { + d= *in++; + } + if ((d & 0xFC00) == 0xDC00) { + c &= 0x03FF; + c <<= 10; + c |= d & 0x03FF; + c += 0x10000; + } + else { + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + } + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlenb = processed - inb; + return(0); +} + +/** + * UTF8ToUTF16BE: + * @outb: a pointer to an array of bytes to store the result + * @outlen: the length of @outb + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE + * block of chars out. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + */ +static int +UTF8ToUTF16BE(unsigned char* outb, int *outlen, + const unsigned char* in, int *inlen) +{ + unsigned short* out = (unsigned short*) outb; + const unsigned char* processed = in; + unsigned short* outstart= out; + unsigned short* outend; + const unsigned char* inend= in+*inlen; + unsigned int c, d; + int trailing; + unsigned char *tmp; + unsigned short tmp1, tmp2; + + if (in == NULL) { + /* + * initialization, add the Byte Order Mark + */ + if (*outlen >= 2) { + outb[0] = 0xFE; + outb[1] = 0xFF; + *outlen = 2; + *inlen = 0; +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Added FEFF Byte Order Mark\n"); +#endif + return(2); + } + *outlen = 0; + *inlen = 0; + return(0); + } + outend = out + (*outlen / 2); + while (in < inend) { + d= *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - in; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in UTF-16 */ + *outlen = out - outstart; + *inlen = processed - in; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x10000) { + if (out >= outend) break; + if (xmlLittleEndian) { + tmp = (unsigned char *) out; + *tmp = c >> 8; + *(tmp + 1) = c; + out++; + } else { + *out++ = c; + } + } + else if (c < 0x110000) { + if (out+1 >= outend) break; + c -= 0x10000; + if (xmlLittleEndian) { + tmp1 = 0xD800 | (c >> 10); + tmp = (unsigned char *) out; + *tmp = tmp1 >> 8; + *(tmp + 1) = (unsigned char) tmp1; + out++; + + tmp2 = 0xDC00 | (c & 0x03FF); + tmp = (unsigned char *) out; + *tmp = tmp2 >> 8; + *(tmp + 1) = (unsigned char) tmp2; + out++; + } else { + *out++ = 0xD800 | (c >> 10); + *out++ = 0xDC00 | (c & 0x03FF); + } + } + else + break; + processed = in; + } + *outlen = (out - outstart) * 2; + *inlen = processed - in; + return(0); +} + +/************************************************************************ + * * + * Generic encoding handling routines * + * * + ************************************************************************/ + +/** + * xmlDetectCharEncoding: + * @in: a pointer to the first bytes of the XML entity, must be at least + * 4 bytes long. + * @len: pointer to the length of the buffer + * + * Guess the encoding of the entity using the first bytes of the entity content + * accordingly of the non-normative appendix F of the XML-1.0 recommendation. + * + * Returns one of the XML_CHAR_ENCODING_... values. + */ +xmlCharEncoding +xmlDetectCharEncoding(const unsigned char* in, int len) +{ + if (len >= 4) { + if ((in[0] == 0x00) && (in[1] == 0x00) && + (in[2] == 0x00) && (in[3] == 0x3C)) + return(XML_CHAR_ENCODING_UCS4BE); + if ((in[0] == 0x3C) && (in[1] == 0x00) && + (in[2] == 0x00) && (in[3] == 0x00)) + return(XML_CHAR_ENCODING_UCS4LE); + if ((in[0] == 0x00) && (in[1] == 0x00) && + (in[2] == 0x3C) && (in[3] == 0x00)) + return(XML_CHAR_ENCODING_UCS4_2143); + if ((in[0] == 0x00) && (in[1] == 0x3C) && + (in[2] == 0x00) && (in[3] == 0x00)) + return(XML_CHAR_ENCODING_UCS4_3412); + if ((in[0] == 0x4C) && (in[1] == 0x6F) && + (in[2] == 0xA7) && (in[3] == 0x94)) + return(XML_CHAR_ENCODING_EBCDIC); + if ((in[0] == 0x3C) && (in[1] == 0x3F) && + (in[2] == 0x78) && (in[3] == 0x6D)) + return(XML_CHAR_ENCODING_UTF8); + } + if (len >= 3) { + /* + * Errata on XML-1.0 June 20 2001 + * We now allow an UTF8 encoded BOM + */ + if ((in[0] == 0xEF) && (in[1] == 0xBB) && + (in[2] == 0xBF)) + return(XML_CHAR_ENCODING_UTF8); + } + if (len >= 2) { + if ((in[0] == 0xFE) && (in[1] == 0xFF)) + return(XML_CHAR_ENCODING_UTF16BE); + if ((in[0] == 0xFF) && (in[1] == 0xFE)) + return(XML_CHAR_ENCODING_UTF16LE); + } + return(XML_CHAR_ENCODING_NONE); +} + +/** + * xmlCleanupEncodingAliases: + * + * Unregisters all aliases + */ +void +xmlCleanupEncodingAliases(void) { + int i; + + if (xmlCharEncodingAliases == NULL) + return; + + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (xmlCharEncodingAliases[i].name != NULL) + xmlFree((char *) xmlCharEncodingAliases[i].name); + if (xmlCharEncodingAliases[i].alias != NULL) + xmlFree((char *) xmlCharEncodingAliases[i].alias); + } + xmlCharEncodingAliasesNb = 0; + xmlCharEncodingAliasesMax = 0; + xmlFree(xmlCharEncodingAliases); + xmlCharEncodingAliases = NULL; +} + +/** + * xmlGetEncodingAlias: + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Lookup an encoding name for the given alias. + * + * Returns NULL if not found the original name otherwise + */ +const char * +xmlGetEncodingAlias(const char *alias) { + int i; + char upper[100]; + + if (alias == NULL) + return(NULL); + + if (xmlCharEncodingAliases == NULL) + return(NULL); + + for (i = 0;i < 99;i++) { + upper[i] = toupper(alias[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { + return(xmlCharEncodingAliases[i].name); + } + } + return(NULL); +} + +/** + * xmlAddEncodingAlias: + * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Registers and alias @alias for an encoding named @name. Existing alias + * will be overwritten. + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlAddEncodingAlias(const char *name, const char *alias) { + int i; + char upper[100]; + + if ((name == NULL) || (alias == NULL)) + return(-1); + + for (i = 0;i < 99;i++) { + upper[i] = toupper(alias[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + if (xmlCharEncodingAliases == NULL) { + xmlCharEncodingAliasesNb = 0; + xmlCharEncodingAliasesMax = 20; + xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) + xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); + if (xmlCharEncodingAliases == NULL) + return(-1); + } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { + xmlCharEncodingAliasesMax *= 2; + xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) + xmlRealloc(xmlCharEncodingAliases, + xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); + } + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { + /* + * Replace the definition. + */ + xmlFree((char *) xmlCharEncodingAliases[i].name); + xmlCharEncodingAliases[i].name = xmlMemStrdup(name); + return(0); + } + } + /* + * Add the definition + */ + xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); + xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); + xmlCharEncodingAliasesNb++; + return(0); +} + +/** + * xmlDelEncodingAlias: + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Unregisters an encoding alias @alias + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlDelEncodingAlias(const char *alias) { + int i; + + if (alias == NULL) + return(-1); + + if (xmlCharEncodingAliases == NULL) + return(-1); + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { + xmlFree((char *) xmlCharEncodingAliases[i].name); + xmlFree((char *) xmlCharEncodingAliases[i].alias); + xmlCharEncodingAliasesNb--; + memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], + sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); + return(0); + } + } + return(-1); +} + +/** + * xmlParseCharEncoding: + * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) + * + * Compare the string to the known encoding schemes already known. Note + * that the comparison is case insensitive accordingly to the section + * [XML] 4.3.3 Character Encoding in Entities. + * + * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE + * if not recognized. + */ +xmlCharEncoding +xmlParseCharEncoding(const char* name) +{ + const char *alias; + char upper[500]; + int i; + + if (name == NULL) + return(XML_CHAR_ENCODING_NONE); + + /* + * Do the alias resolution + */ + alias = xmlGetEncodingAlias(name); + if (alias != NULL) + name = alias; + + for (i = 0;i < 499;i++) { + upper[i] = toupper(name[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); + if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); + if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); + + /* + * NOTE: if we were able to parse this, the endianness of UTF16 is + * already found and in use + */ + if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); + if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); + + if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); + if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); + if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); + + /* + * NOTE: if we were able to parse this, the endianness of UCS4 is + * already found and in use + */ + if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); + if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); + if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); + + + if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); + if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); + if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); + + if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); + if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); + if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); + + if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); + if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); + if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); + if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); + if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); + if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); + if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); + + if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); + if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); + if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); +#endif + return(XML_CHAR_ENCODING_ERROR); +} + +/** + * xmlGetCharEncodingName: + * @enc: the encoding + * + * The "canonical" name for XML encoding. + * C.f. http://www.w3.org/TR/REC-xml#charencoding + * Section 4.3.3 Character Encoding in Entities + * + * Returns the canonical name for the given encoding + */ + +const char* +xmlGetCharEncodingName(xmlCharEncoding enc) { + switch (enc) { + case XML_CHAR_ENCODING_ERROR: + return(NULL); + case XML_CHAR_ENCODING_NONE: + return(NULL); + case XML_CHAR_ENCODING_UTF8: + return("UTF-8"); + case XML_CHAR_ENCODING_UTF16LE: + return("UTF-16"); + case XML_CHAR_ENCODING_UTF16BE: + return("UTF-16"); + case XML_CHAR_ENCODING_EBCDIC: + return("EBCDIC"); + case XML_CHAR_ENCODING_UCS4LE: + return("ISO-10646-UCS-4"); + case XML_CHAR_ENCODING_UCS4BE: + return("ISO-10646-UCS-4"); + case XML_CHAR_ENCODING_UCS4_2143: + return("ISO-10646-UCS-4"); + case XML_CHAR_ENCODING_UCS4_3412: + return("ISO-10646-UCS-4"); + case XML_CHAR_ENCODING_UCS2: + return("ISO-10646-UCS-2"); + case XML_CHAR_ENCODING_8859_1: + return("ISO-8859-1"); + case XML_CHAR_ENCODING_8859_2: + return("ISO-8859-2"); + case XML_CHAR_ENCODING_8859_3: + return("ISO-8859-3"); + case XML_CHAR_ENCODING_8859_4: + return("ISO-8859-4"); + case XML_CHAR_ENCODING_8859_5: + return("ISO-8859-5"); + case XML_CHAR_ENCODING_8859_6: + return("ISO-8859-6"); + case XML_CHAR_ENCODING_8859_7: + return("ISO-8859-7"); + case XML_CHAR_ENCODING_8859_8: + return("ISO-8859-8"); + case XML_CHAR_ENCODING_8859_9: + return("ISO-8859-9"); + case XML_CHAR_ENCODING_2022_JP: + return("ISO-2022-JP"); + case XML_CHAR_ENCODING_SHIFT_JIS: + return("Shift-JIS"); + case XML_CHAR_ENCODING_EUC_JP: + return("EUC-JP"); + case XML_CHAR_ENCODING_ASCII: + return(NULL); + } + return(NULL); +} + +/************************************************************************ + * * + * Char encoding handlers * + * * + ************************************************************************/ + + +/* the size should be growable, but it's not a big deal ... */ +#define MAX_ENCODING_HANDLERS 50 +static xmlCharEncodingHandlerPtr *handlers = NULL; +static int nbCharEncodingHandler = 0; + +/* + * The default is UTF-8 for XML, that's also the default used for the + * parser internals, so the default encoding handler is NULL + */ + +static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; + +/** + * xmlNewCharEncodingHandler: + * @name: the encoding name, in UTF-8 format (ASCII actually) + * @input: the xmlCharEncodingInputFunc to read that encoding + * @output: the xmlCharEncodingOutputFunc to write that encoding + * + * Create and registers an xmlCharEncodingHandler. + * + * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). + */ +xmlCharEncodingHandlerPtr +xmlNewCharEncodingHandler(const char *name, + xmlCharEncodingInputFunc input, + xmlCharEncodingOutputFunc output) { + xmlCharEncodingHandlerPtr handler; + const char *alias; + char upper[500]; + int i; + char *up = 0; + + /* + * Do the alias resolution + */ + alias = xmlGetEncodingAlias(name); + if (alias != NULL) + name = alias; + + /* + * Keep only the uppercase version of the encoding. + */ + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewCharEncodingHandler : no name !\n"); + return(NULL); + } + for (i = 0;i < 499;i++) { + upper[i] = toupper(name[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + up = xmlMemStrdup(upper); + if (up == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewCharEncodingHandler : out of memory !\n"); + return(NULL); + } + + /* + * allocate and fill-up an handler block. + */ + handler = (xmlCharEncodingHandlerPtr) + xmlMalloc(sizeof(xmlCharEncodingHandler)); + if (handler == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewCharEncodingHandler : out of memory !\n"); + return(NULL); + } + handler->input = input; + handler->output = output; + handler->name = up; + +#ifdef LIBXML_ICONV_ENABLED + handler->iconv_in = NULL; + handler->iconv_out = NULL; +#endif /* LIBXML_ICONV_ENABLED */ + + /* + * registers and returns the handler. + */ + xmlRegisterCharEncodingHandler(handler); +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Registered encoding handler for %s\n", name); +#endif + return(handler); +} + +/** + * xmlInitCharEncodingHandlers: + * + * Initialize the char encoding support, it registers the default + * encoding supported. + * NOTE: while public, this function usually doesn't need to be called + * in normal processing. + */ +void +xmlInitCharEncodingHandlers(void) { + unsigned short int tst = 0x1234; + unsigned char *ptr = (unsigned char *) &tst; + + if (handlers != NULL) return; + + handlers = (xmlCharEncodingHandlerPtr *) + xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); + + if (*ptr == 0x12) xmlLittleEndian = 0; + else if (*ptr == 0x34) xmlLittleEndian = 1; + else xmlGenericError(xmlGenericErrorContext, + "Odd problem at endianness detection\n"); + + if (handlers == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlInitCharEncodingHandlers : out of memory !\n"); + return; + } + xmlNewCharEncodingHandler("UTF-8", NULL, NULL); + xmlUTF16LEHandler = + xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); + xmlUTF16BEHandler = + xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); + xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); + xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); + xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); +#ifdef LIBXML_HTML_ENABLED + xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); +#endif +} + +/** + * xmlCleanupCharEncodingHandlers: + * + * Cleanup the memory allocated for the char encoding support, it + * unregisters all the encoding handlers and the aliases. + */ +void +xmlCleanupCharEncodingHandlers(void) { + xmlCleanupEncodingAliases(); + + if (handlers == NULL) return; + + for (;nbCharEncodingHandler > 0;) { + nbCharEncodingHandler--; + if (handlers[nbCharEncodingHandler] != NULL) { + if (handlers[nbCharEncodingHandler]->name != NULL) + xmlFree(handlers[nbCharEncodingHandler]->name); + xmlFree(handlers[nbCharEncodingHandler]); + } + } + xmlFree(handlers); + handlers = NULL; + nbCharEncodingHandler = 0; + xmlDefaultCharEncodingHandler = NULL; +} + +/** + * xmlRegisterCharEncodingHandler: + * @handler: the xmlCharEncodingHandlerPtr handler block + * + * Register the char encoding handler, surprising, isn't it ? + */ +void +xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { + if (handlers == NULL) xmlInitCharEncodingHandlers(); + if (handler == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlRegisterCharEncodingHandler: NULL handler !\n"); + return; + } + + if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { + xmlGenericError(xmlGenericErrorContext, + "xmlRegisterCharEncodingHandler: Too many handler registered\n"); + xmlGenericError(xmlGenericErrorContext, + "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__); + return; + } + handlers[nbCharEncodingHandler++] = handler; +} + +/** + * xmlGetCharEncodingHandler: + * @enc: an xmlCharEncoding value. + * + * Search in the registered set the handler able to read/write that encoding. + * + * Returns the handler or NULL if not found + */ +xmlCharEncodingHandlerPtr +xmlGetCharEncodingHandler(xmlCharEncoding enc) { + xmlCharEncodingHandlerPtr handler; + + if (handlers == NULL) xmlInitCharEncodingHandlers(); + switch (enc) { + case XML_CHAR_ENCODING_ERROR: + return(NULL); + case XML_CHAR_ENCODING_NONE: + return(NULL); + case XML_CHAR_ENCODING_UTF8: + return(NULL); + case XML_CHAR_ENCODING_UTF16LE: + return(xmlUTF16LEHandler); + case XML_CHAR_ENCODING_UTF16BE: + return(xmlUTF16BEHandler); + case XML_CHAR_ENCODING_EBCDIC: + handler = xmlFindCharEncodingHandler("EBCDIC"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("ebcdic"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_UCS4BE: + handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS-4"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS4"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_UCS4LE: + handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS-4"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS4"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_UCS4_2143: + break; + case XML_CHAR_ENCODING_UCS4_3412: + break; + case XML_CHAR_ENCODING_UCS2: + handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS-2"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS2"); + if (handler != NULL) return(handler); + break; + + /* + * We used to keep ISO Latin encodings native in the + * generated data. This led to so many problems that + * this has been removed. One can still change this + * back by registering no-ops encoders for those + */ + case XML_CHAR_ENCODING_8859_1: + handler = xmlFindCharEncodingHandler("ISO-8859-1"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_2: + handler = xmlFindCharEncodingHandler("ISO-8859-2"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_3: + handler = xmlFindCharEncodingHandler("ISO-8859-3"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_4: + handler = xmlFindCharEncodingHandler("ISO-8859-4"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_5: + handler = xmlFindCharEncodingHandler("ISO-8859-5"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_6: + handler = xmlFindCharEncodingHandler("ISO-8859-6"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_7: + handler = xmlFindCharEncodingHandler("ISO-8859-7"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_8: + handler = xmlFindCharEncodingHandler("ISO-8859-8"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_9: + handler = xmlFindCharEncodingHandler("ISO-8859-9"); + if (handler != NULL) return(handler); + break; + + + case XML_CHAR_ENCODING_2022_JP: + handler = xmlFindCharEncodingHandler("ISO-2022-JP"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_SHIFT_JIS: + handler = xmlFindCharEncodingHandler("SHIFT-JIS"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("SHIFT_JIS"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("Shift_JIS"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_EUC_JP: + handler = xmlFindCharEncodingHandler("EUC-JP"); + if (handler != NULL) return(handler); + break; + default: + break; + } + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "No handler found for encoding %d\n", enc); +#endif + return(NULL); +} + +/** + * xmlFindCharEncodingHandler: + * @name: a string describing the char encoding. + * + * Search in the registered set the handler able to read/write that encoding. + * + * Returns the handler or NULL if not found + */ +xmlCharEncodingHandlerPtr +xmlFindCharEncodingHandler(const char *name) { + const char *nalias; + const char *norig; + xmlCharEncoding alias; +#ifdef LIBXML_ICONV_ENABLED + xmlCharEncodingHandlerPtr enc; + iconv_t icv_in, icv_out; +#endif /* LIBXML_ICONV_ENABLED */ + char upper[100]; + int i; + + if (handlers == NULL) xmlInitCharEncodingHandlers(); + if (name == NULL) return(xmlDefaultCharEncodingHandler); + if (name[0] == 0) return(xmlDefaultCharEncodingHandler); + + /* + * Do the alias resolution + */ + norig = name; + nalias = xmlGetEncodingAlias(name); + if (nalias != NULL) + name = nalias; + + /* + * Check first for directly registered encoding names + */ + for (i = 0;i < 99;i++) { + upper[i] = toupper(name[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + for (i = 0;i < nbCharEncodingHandler; i++) + if (!strcmp(upper, handlers[i]->name)) { +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Found registered handler for encoding %s\n", name); +#endif + return(handlers[i]); + } + +#ifdef LIBXML_ICONV_ENABLED + /* check whether iconv can handle this */ + icv_in = iconv_open("UTF-8", name); + icv_out = iconv_open(name, "UTF-8"); + if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { + enc = (xmlCharEncodingHandlerPtr) + xmlMalloc(sizeof(xmlCharEncodingHandler)); + if (enc == NULL) { + iconv_close(icv_in); + iconv_close(icv_out); + return(NULL); + } + enc->name = xmlMemStrdup(name); + enc->input = NULL; + enc->output = NULL; + enc->iconv_in = icv_in; + enc->iconv_out = icv_out; +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Found iconv handler for encoding %s\n", name); +#endif + return enc; + } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { + xmlGenericError(xmlGenericErrorContext, + "iconv : problems with filters for '%s'\n", name); + } +#endif /* LIBXML_ICONV_ENABLED */ + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "No handler found for encoding %s\n", name); +#endif + + /* + * Fallback using the canonical names + */ + alias = xmlParseCharEncoding(norig); + if (alias != XML_CHAR_ENCODING_ERROR) { + const char* canon; + canon = xmlGetCharEncodingName(alias); + if ((canon != NULL) && (strcmp(name, canon))) { + return(xmlFindCharEncodingHandler(canon)); + } + } + + return(NULL); +} + +/************************************************************************ + * * + * ICONV based generic conversion functions * + * * + ************************************************************************/ + +#ifdef LIBXML_ICONV_ENABLED +/** + * xmlIconvWrapper: + * @cd: iconv converter data structure + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of ISO Latin 1 chars + * @inlen: the length of @in + * + * Returns 0 if success, or + * -1 by lack of space, or + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + * -3 if there the last byte can't form a single output char. + * + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of ocetes consumed. + */ +static int +xmlIconvWrapper(iconv_t cd, + unsigned char *out, int *outlen, + const unsigned char *in, int *inlen) { + + size_t icv_inlen = *inlen, icv_outlen = *outlen; + const char *icv_in = (const char *) in; + char *icv_out = (char *) out; + int ret; + + ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); + if (in != NULL) { + *inlen -= icv_inlen; + *outlen -= icv_outlen; + } else { + *inlen = 0; + *outlen = 0; + } + if ((icv_inlen != 0) || (ret == -1)) { +#ifdef EILSEQ + if (errno == EILSEQ) { + return -2; + } else +#endif +#ifdef E2BIG + if (errno == E2BIG) { + return -1; + } else +#endif +#ifdef EINVAL + if (errno == EINVAL) { + return -3; + } else +#endif + { + return -3; + } + } + return 0; +} +#endif /* LIBXML_ICONV_ENABLED */ + +/************************************************************************ + * * + * The real API used by libxml for on-the-fly conversion * + * * + ************************************************************************/ + +/** + * xmlCharEncFirstLine: + * @handler: char enconding transformation data structure + * @out: an xmlBuffer for the output. + * @in: an xmlBuffer for the input + * + * Front-end for the encoding handler input function, but handle only + * the very first line, i.e. limit itself to 45 chars. + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, + xmlBufferPtr in) { + int ret = -2; + int written; + int toconv; + + if (handler == NULL) return(-1); + if (out == NULL) return(-1); + if (in == NULL) return(-1); + + written = out->size - out->use; + toconv = in->use; + if (toconv * 2 >= written) { + xmlBufferGrow(out, toconv); + written = out->size - out->use - 1; + } + + /* + * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 + * 45 chars should be sufficient to reach the end of the encoding + * declaration without going too far inside the document content. + */ + written = 45; + + if (handler->input != NULL) { + ret = handler->input(&out->content[out->use], &written, + in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_in != NULL) { + ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + if (ret == -1) ret = -3; + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef DEBUG_ENCODING + switch (ret) { + case 0: + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input\n", + toconv, written); + break; + case -1: + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", + toconv, written, in->use); + break; + case -2: + xmlGenericError(xmlGenericErrorContext, + "input conversion failed due to input error\n"); + break; + case -3: + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", + toconv, written, in->use); + break; + default: + xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); + } +#endif /* DEBUG_ENCODING */ + /* + * Ignore when input buffer is not on a boundary + */ + if (ret == -3) ret = 0; + if (ret == -1) ret = 0; + return(ret); +} + +/** + * xmlCharEncInFunc: + * @handler: char encoding transformation data structure + * @out: an xmlBuffer for the output. + * @in: an xmlBuffer for the input + * + * Generic front-end for the encoding handler input function + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, + xmlBufferPtr in) +{ + int ret = -2; + int written; + int toconv; + + if (handler == NULL) + return (-1); + if (out == NULL) + return (-1); + if (in == NULL) + return (-1); + + toconv = in->use; + if (toconv == 0) + return (0); + written = out->size - out->use; + if (toconv * 2 >= written) { + xmlBufferGrow(out, out->size + toconv * 2); + written = out->size - out->use - 1; + } + if (handler->input != NULL) { + ret = handler->input(&out->content[out->use], &written, + in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_in != NULL) { + ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + if (ret == -1) + ret = -3; + } +#endif /* LIBXML_ICONV_ENABLED */ + switch (ret) { + case 0: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input\n", + toconv, written); +#endif + break; + case -1: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input, %d left\n", + toconv, written, in->use); +#endif + break; + case -3: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input, %d left\n", + toconv, written, in->use); +#endif + break; + case -2: + xmlGenericError(xmlGenericErrorContext, + "input conversion failed due to input error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + in->content[0], in->content[1], + in->content[2], in->content[3]); + } + /* + * Ignore when input buffer is not on a boundary + */ + if (ret == -3) + ret = 0; + return (written); +} + +/** + * xmlCharEncOutFunc: + * @handler: char enconding transformation data structure + * @out: an xmlBuffer for the output. + * @in: an xmlBuffer for the input + * + * Generic front-end for the encoding handler output function + * a first call with @in == NULL has to be made firs to initiate the + * output in case of non-stateless encoding needing to initiate their + * state or the output (like the BOM in UTF16). + * In case of UTF8 sequence conversion errors for the given encoder, + * the content will be automatically remapped to a CharRef sequence. + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, + xmlBufferPtr in) { + int ret = -2; + int written; + int writtentot = 0; + int toconv; + int output = 0; + + if (handler == NULL) return(-1); + if (out == NULL) return(-1); + +retry: + + written = out->size - out->use; + + /* + * First specific handling of in = NULL, i.e. the initialization call + */ + if (in == NULL) { + toconv = 0; + if (handler->output != NULL) { + ret = handler->output(&out->content[out->use], &written, + NULL, &toconv); + out->use += written; + out->content[out->use] = 0; + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_out != NULL) { + ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], + &written, NULL, &toconv); + out->use += written; + out->content[out->use] = 0; + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "initialized encoder\n"); +#endif + return(0); + } + + /* + * Conversion itself. + */ + toconv = in->use; + if (toconv == 0) + return(0); + if (toconv * 2 >= written) { + xmlBufferGrow(out, toconv * 2); + written = out->size - out->use - 1; + } + if (handler->output != NULL) { + ret = handler->output(&out->content[out->use], &written, + in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + writtentot += written; + out->content[out->use] = 0; + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_out != NULL) { + ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + writtentot += written; + out->content[out->use] = 0; + if (ret == -1) { + if (written > 0) { + /* + * Can be a limitation of iconv + */ + goto retry; + } + ret = -3; + } + } +#endif /* LIBXML_ICONV_ENABLED */ + else { + xmlGenericError(xmlGenericErrorContext, + "xmlCharEncOutFunc: no output function !\n"); + return(-1); + } + + if (ret >= 0) output += ret; + + /* + * Attempt to handle error cases + */ + switch (ret) { + case 0: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of output\n", + toconv, written); +#endif + break; + case -1: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "output conversion failed by lack of space\n"); +#endif + break; + case -3: + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", + toconv, written, in->use); + break; + case -2: { + int len = in->use; + const xmlChar *utf = (const xmlChar *) in->content; + int cur; + + cur = xmlGetUTF8Char(utf, &len); + if (cur > 0) { + xmlChar charref[20]; + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "handling output conversion error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + in->content[0], in->content[1], + in->content[2], in->content[3]); +#endif + /* + * Removes the UTF8 sequence, and replace it by a charref + * and continue the transcoding phase, hoping the error + * did not mangle the encoder state. + */ + snprintf((char *) charref, sizeof(charref), "&#%d;", cur); + xmlBufferShrink(in, len); + xmlBufferAddHead(in, charref, -1); + + goto retry; + } else { + xmlGenericError(xmlGenericErrorContext, + "output conversion failed due to conv error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + in->content[0], in->content[1], + in->content[2], in->content[3]); + in->content[0] = ' '; + } + break; + } + } + return(ret); +} + +/** + * xmlCharEncCloseFunc: + * @handler: char enconding transformation data structure + * + * Generic front-end for encoding handler close function + * + * Returns 0 if success, or -1 in case of error + */ +int +xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { + int ret = 0; + if (handler == NULL) return(-1); + if (handler->name == NULL) return(-1); +#ifdef LIBXML_ICONV_ENABLED + /* + * Iconv handlers can be used only once, free the whole block. + * and the associated icon resources. + */ + if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { + if (handler->name != NULL) + xmlFree(handler->name); + handler->name = NULL; + if (handler->iconv_out != NULL) { + if (iconv_close(handler->iconv_out)) + ret = -1; + handler->iconv_out = NULL; + } + if (handler->iconv_in != NULL) { + if (iconv_close(handler->iconv_in)) + ret = -1; + handler->iconv_in = NULL; + } + xmlFree(handler); + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef DEBUG_ENCODING + if (ret) + xmlGenericError(xmlGenericErrorContext, + "failed to close the encoding handler\n"); + else + xmlGenericError(xmlGenericErrorContext, + "closed the encoding handler\n"); +#endif + + return(ret); +} + diff --git a/bundle/libxml/entities.c b/bundle/libxml/entities.c new file mode 100644 index 0000000000..dbdf9ebf32 --- /dev/null +++ b/bundle/libxml/entities.c @@ -0,0 +1,1006 @@ +/* + * entities.c : implementation for the XML entities handling + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#include <libxml/xmlmemory.h> +#include <libxml/hash.h> +#include <libxml/entities.h> +#include <libxml/parser.h> +#include <libxml/xmlerror.h> +#include <libxml/globals.h> + +/* + * The XML predefined entities. + */ + +struct xmlPredefinedEntityValue { + const char *name; + const char *value; +}; +static struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = { + { "lt", "<" }, + { "gt", ">" }, + { "apos", "'" }, + { "quot", "\"" }, + { "amp", "&" } +}; + +/* + * TODO: This is GROSS, allocation of a 256 entry hash for + * a fixed number of 4 elements ! + */ +static xmlHashTablePtr xmlPredefinedEntities = NULL; + +/* + * xmlFreeEntity : clean-up an entity record. + */ +static void xmlFreeEntity(xmlEntityPtr entity) { + if (entity == NULL) return; + + if ((entity->children) && + (entity == (xmlEntityPtr) entity->children->parent)) + xmlFreeNodeList(entity->children); + if (entity->name != NULL) + xmlFree((char *) entity->name); + if (entity->ExternalID != NULL) + xmlFree((char *) entity->ExternalID); + if (entity->SystemID != NULL) + xmlFree((char *) entity->SystemID); + if (entity->URI != NULL) + xmlFree((char *) entity->URI); + if (entity->content != NULL) + xmlFree((char *) entity->content); + if (entity->orig != NULL) + xmlFree((char *) entity->orig); + xmlFree(entity); +} + +/* + * xmlAddEntity : register a new entity for an entities table. + */ +static xmlEntityPtr +xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type, + const xmlChar *ExternalID, const xmlChar *SystemID, + const xmlChar *content) { + xmlEntitiesTablePtr table = NULL; + xmlEntityPtr ret; + + if (name == NULL) + return(NULL); + switch (type) { + case XML_INTERNAL_GENERAL_ENTITY: + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + if (dtd->entities == NULL) + dtd->entities = xmlHashCreate(0); + table = dtd->entities; + break; + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + if (dtd->pentities == NULL) + dtd->pentities = xmlHashCreate(0); + table = dtd->pentities; + break; + case XML_INTERNAL_PREDEFINED_ENTITY: + if (xmlPredefinedEntities == NULL) + xmlPredefinedEntities = xmlHashCreate(8); + table = xmlPredefinedEntities; + } + if (table == NULL) + return(NULL); + ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddEntity: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlEntity)); + ret->type = XML_ENTITY_DECL; + + /* + * fill the structure. + */ + ret->name = xmlStrdup(name); + ret->etype = (xmlEntityType) type; + if (ExternalID != NULL) + ret->ExternalID = xmlStrdup(ExternalID); + if (SystemID != NULL) + ret->SystemID = xmlStrdup(SystemID); + if (content != NULL) { + ret->length = xmlStrlen(content); + ret->content = xmlStrndup(content, ret->length); + } else { + ret->length = 0; + ret->content = NULL; + } + ret->URI = NULL; /* to be computed by the layer knowing + the defining entity */ + ret->orig = NULL; + + if (xmlHashAddEntry(table, name, ret)) { + /* + * entity was already defined at another level. + */ + xmlFreeEntity(ret); + return(NULL); + } + return(ret); +} + +/** + * xmlInitializePredefinedEntities: + * + * Set up the predefined entities. + */ +void xmlInitializePredefinedEntities(void) { + unsigned int i; + xmlChar name[50]; + xmlChar value[50]; + const char *in; + xmlChar *out; + + if (xmlPredefinedEntities != NULL) return; + + xmlPredefinedEntities = xmlCreateEntitiesTable(); + for (i = 0;i < sizeof(xmlPredefinedEntityValues) / + sizeof(xmlPredefinedEntityValues[0]);i++) { + in = xmlPredefinedEntityValues[i].name; + out = &name[0]; + for (;(*out++ = (xmlChar) *in);)in++; + in = xmlPredefinedEntityValues[i].value; + out = &value[0]; + for (;(*out++ = (xmlChar) *in);)in++; + + xmlAddEntity(NULL, (const xmlChar *) &name[0], + XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL, + &value[0]); + } +} + +/** + * xmlCleanupPredefinedEntities: + * + * Cleanup up the predefined entities table. + */ +void xmlCleanupPredefinedEntities(void) { + if (xmlPredefinedEntities == NULL) return; + + xmlFreeEntitiesTable(xmlPredefinedEntities); + xmlPredefinedEntities = NULL; +} + +/** + * xmlGetPredefinedEntity: + * @name: the entity name + * + * Check whether this name is an predefined entity. + * + * Returns NULL if not, otherwise the entity + */ +xmlEntityPtr +xmlGetPredefinedEntity(const xmlChar *name) { + if (xmlPredefinedEntities == NULL) + xmlInitializePredefinedEntities(); + return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name)); +} + +/** + * xmlAddDtdEntity: + * @doc: the document + * @name: the entity name + * @type: the entity type XML_xxx_yyy_ENTITY + * @ExternalID: the entity external ID if available + * @SystemID: the entity system ID if available + * @content: the entity content + * + * Register a new entity for this document DTD external subset. + * + * Returns a pointer to the entity or NULL in case of error + */ +xmlEntityPtr +xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type, + const xmlChar *ExternalID, const xmlChar *SystemID, + const xmlChar *content) { + xmlEntityPtr ret; + xmlDtdPtr dtd; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddDtdEntity: doc == NULL !\n"); + return(NULL); + } + if (doc->extSubset == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddDtdEntity: document without external subset !\n"); + return(NULL); + } + dtd = doc->extSubset; + ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); + if (ret == NULL) return(NULL); + + /* + * Link it to the DTD + */ + ret->parent = dtd; + ret->doc = dtd->doc; + if (dtd->last == NULL) { + dtd->children = dtd->last = (xmlNodePtr) ret; + } else { + dtd->last->next = (xmlNodePtr) ret; + ret->prev = dtd->last; + dtd->last = (xmlNodePtr) ret; + } + return(ret); +} + +/** + * xmlAddDocEntity: + * @doc: the document + * @name: the entity name + * @type: the entity type XML_xxx_yyy_ENTITY + * @ExternalID: the entity external ID if available + * @SystemID: the entity system ID if available + * @content: the entity content + * + * Register a new entity for this document. + * + * Returns a pointer to the entity or NULL in case of error + */ +xmlEntityPtr +xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type, + const xmlChar *ExternalID, const xmlChar *SystemID, + const xmlChar *content) { + xmlEntityPtr ret; + xmlDtdPtr dtd; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddDocEntity: document is NULL !\n"); + return(NULL); + } + if (doc->intSubset == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddDocEntity: document without internal subset !\n"); + return(NULL); + } + dtd = doc->intSubset; + ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); + if (ret == NULL) return(NULL); + + /* + * Link it to the DTD + */ + ret->parent = dtd; + ret->doc = dtd->doc; + if (dtd->last == NULL) { + dtd->children = dtd->last = (xmlNodePtr) ret; + } else { + dtd->last->next = (xmlNodePtr) ret; + ret->prev = dtd->last; + dtd->last = (xmlNodePtr) ret; + } + return(ret); +} + +/** + * xmlGetEntityFromTable: + * @table: an entity table + * @name: the entity name + * @parameter: look for parameter entities + * + * Do an entity lookup in the table. + * returns the corresponding parameter entity, if found. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +static xmlEntityPtr +xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) { + return((xmlEntityPtr) xmlHashLookup(table, name)); +} + +/** + * xmlGetParameterEntity: + * @doc: the document referencing the entity + * @name: the entity name + * + * Do an entity lookup in the internal and external subsets and + * returns the corresponding parameter entity, if found. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +xmlEntityPtr +xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) { + xmlEntitiesTablePtr table; + xmlEntityPtr ret; + + if (doc == NULL) + return(NULL); + if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) { + table = (xmlEntitiesTablePtr) doc->intSubset->pentities; + ret = xmlGetEntityFromTable(table, name); + if (ret != NULL) + return(ret); + } + if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) { + table = (xmlEntitiesTablePtr) doc->extSubset->pentities; + return(xmlGetEntityFromTable(table, name)); + } + return(NULL); +} + +/** + * xmlGetDtdEntity: + * @doc: the document referencing the entity + * @name: the entity name + * + * Do an entity lookup in the DTD entity hash table and + * returns the corresponding entity, if found. + * Note: the first argument is the document node, not the DTD node. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +xmlEntityPtr +xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) { + xmlEntitiesTablePtr table; + + if (doc == NULL) + return(NULL); + if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->extSubset->entities; + return(xmlGetEntityFromTable(table, name)); + } + return(NULL); +} + +/** + * xmlGetDocEntity: + * @doc: the document referencing the entity + * @name: the entity name + * + * Do an entity lookup in the document entity hash table and + * returns the corresponding entity, otherwise a lookup is done + * in the predefined entities too. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +xmlEntityPtr +xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) { + xmlEntityPtr cur; + xmlEntitiesTablePtr table; + + if (doc != NULL) { + if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->intSubset->entities; + cur = xmlGetEntityFromTable(table, name); + if (cur != NULL) + return(cur); + } + if (doc->standalone != 1) { + if ((doc->extSubset != NULL) && + (doc->extSubset->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->extSubset->entities; + cur = xmlGetEntityFromTable(table, name); + if (cur != NULL) + return(cur); + } + } + } + if (xmlPredefinedEntities == NULL) + xmlInitializePredefinedEntities(); + table = xmlPredefinedEntities; + return(xmlGetEntityFromTable(table, name)); +} + +/* + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ +#define IS_CHAR(c) \ + (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ + (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) + +/* + * A buffer used for converting entities to their equivalent and back. + */ +static int static_buffer_size = 0; +static xmlChar *static_buffer = NULL; + +static int growBuffer(void) { + static_buffer_size *= 2; + static_buffer = (xmlChar *) xmlRealloc(static_buffer, + static_buffer_size * sizeof(xmlChar)); + if (static_buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed\n"); + return(-1); + } + return(0); +} + + +/** + * xmlEncodeEntities: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts. + * + * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary + * compatibility + * + * People must migrate their code to xmlEncodeEntitiesReentrant ! + * This routine will issue a warning when encountered. + * + * Returns A newly allocated string with the substitution done. + */ +const xmlChar * +xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) { + const xmlChar *cur = input; + xmlChar *out = static_buffer; + static int warning = 1; + int html = 0; + + + if (warning) { + xmlGenericError(xmlGenericErrorContext, + "Deprecated API xmlEncodeEntities() used\n"); + xmlGenericError(xmlGenericErrorContext, + " change code to use xmlEncodeEntitiesReentrant()\n"); + warning = 0; + } + + if (input == NULL) return(NULL); + if (doc != NULL) + html = (doc->type == XML_HTML_DOCUMENT_NODE); + + if (static_buffer == NULL) { + static_buffer_size = 1000; + static_buffer = (xmlChar *) + xmlMalloc(static_buffer_size * sizeof(xmlChar)); + if (static_buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed\n"); + return(NULL); + } + out = static_buffer; + } + while (*cur != '\0') { + if (out - static_buffer > static_buffer_size - 100) { + int indx = out - static_buffer; + + growBuffer(); + out = &static_buffer[indx]; + } + + /* + * By default one have to encode at least '<', '>', '"' and '&' ! + */ + if (*cur == '<') { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '>') { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '&') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if (*cur == '"') { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; + } else if ((*cur == '\'') && (!html)) { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'p'; + *out++ = 'o'; + *out++ = 's'; + *out++ = ';'; + } else if (((*cur >= 0x20) && (*cur < 0x80)) || + (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) { + /* + * default case, just copy ! + */ + *out++ = *cur; +#ifndef USE_UTF_8 + } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) { + char buf[10], *ptr; + + snprintf(buf, sizeof(buf), "&#%d;", *cur); + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; +#endif + } else if (IS_CHAR(*cur)) { + char buf[10], *ptr; + + snprintf(buf, sizeof(buf), "&#%d;", *cur); + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + } +#if 0 + else { + /* + * default case, this is not a valid char ! + * Skip it... + */ + xmlGenericError(xmlGenericErrorContext, + "xmlEncodeEntities: invalid char %d\n", (int) *cur); + } +#endif + cur++; + } + *out++ = 0; + return(static_buffer); +} + +/* + * Macro used to grow the current buffer. + */ +#define growBufferReentrant() { \ + buffer_size *= 2; \ + buffer = (xmlChar *) \ + xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + xmlGenericError(xmlGenericErrorContext, "realloc failed\n"); \ + return(NULL); \ + } \ +} + + +/** + * xmlEncodeEntitiesReentrant: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts. + * Contrary to xmlEncodeEntities, this routine is reentrant, and result + * must be deallocated. + * + * Returns A newly allocated string with the substitution done. + */ +xmlChar * +xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { + const xmlChar *cur = input; + xmlChar *buffer = NULL; + xmlChar *out = NULL; + int buffer_size = 0; + int html = 0; + + if (input == NULL) return(NULL); + if (doc != NULL) + html = (doc->type == XML_HTML_DOCUMENT_NODE); + + /* + * allocate an translation buffer. + */ + buffer_size = 1000; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed\n"); + return(NULL); + } + out = buffer; + + while (*cur != '\0') { + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBufferReentrant(); + out = &buffer[indx]; + } + + /* + * By default one have to encode at least '<', '>', '"' and '&' ! + */ + if (*cur == '<') { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '>') { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '&') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if (*cur == '"') { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; +#if 0 + } else if ((*cur == '\'') && (!html)) { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'p'; + *out++ = 'o'; + *out++ = 's'; + *out++ = ';'; +#endif + } else if (((*cur >= 0x20) && (*cur < 0x80)) || + (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) { + /* + * default case, just copy ! + */ + *out++ = *cur; + } else if (*cur >= 0x80) { + if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { + /* + * Bjørn Reese <br@sseusa.com> provided the patch + xmlChar xc; + xc = (*cur & 0x3F) << 6; + if (cur[1] != 0) { + xc += *(++cur) & 0x3F; + *out++ = xc; + } else + */ + *out++ = *cur; + } else { + /* + * We assume we have UTF-8 input. + */ + char buf[10], *ptr; + int val = 0, l = 1; + + if (*cur < 0xC0) { + xmlGenericError(xmlGenericErrorContext, + "xmlEncodeEntitiesReentrant : input not UTF-8\n"); + if (doc != NULL) + doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); + snprintf(buf, sizeof(buf), "&#%d;", *cur); + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + cur++; + continue; + } else if (*cur < 0xE0) { + val = (cur[0]) & 0x1F; + val <<= 6; + val |= (cur[1]) & 0x3F; + l = 2; + } else if (*cur < 0xF0) { + val = (cur[0]) & 0x0F; + val <<= 6; + val |= (cur[1]) & 0x3F; + val <<= 6; + val |= (cur[2]) & 0x3F; + l = 3; + } else if (*cur < 0xF8) { + val = (cur[0]) & 0x07; + val <<= 6; + val |= (cur[1]) & 0x3F; + val <<= 6; + val |= (cur[2]) & 0x3F; + val <<= 6; + val |= (cur[3]) & 0x3F; + l = 4; + } + if ((l == 1) || (!IS_CHAR(val))) { + xmlGenericError(xmlGenericErrorContext, + "xmlEncodeEntitiesReentrant : char out of range\n"); + if (doc != NULL) + doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); + snprintf(buf, sizeof(buf), "&#%d;", *cur); + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + cur++; + continue; + } + /* + * We could do multiple things here. Just save as a char ref + */ + if (html) + snprintf(buf, sizeof(buf), "&#%d;", val); + else + snprintf(buf, sizeof(buf), "&#x%X;", val); + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + cur += l; + continue; + } + } else if (IS_CHAR(*cur)) { + char buf[10], *ptr; + + snprintf(buf, sizeof(buf), "&#%d;", *cur); + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + } +#if 0 + else { + /* + * default case, this is not a valid char ! + * Skip it... + */ + xmlGenericError(xmlGenericErrorContext, + "xmlEncodeEntities: invalid char %d\n", (int) *cur); + } +#endif + cur++; + } + *out++ = 0; + return(buffer); +} + +/** + * xmlEncodeSpecialChars: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * this routine is reentrant, and result must be deallocated. + * + * Returns A newly allocated string with the substitution done. + */ +xmlChar * +xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) { + const xmlChar *cur = input; + xmlChar *buffer = NULL; + xmlChar *out = NULL; + int buffer_size = 0; + int html = 0; + + if (input == NULL) return(NULL); + if (doc != NULL) + html = (doc->type == XML_HTML_DOCUMENT_NODE); + + /* + * allocate an translation buffer. + */ + buffer_size = 1000; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed\n"); + return(NULL); + } + out = buffer; + + while (*cur != '\0') { + if (out - buffer > buffer_size - 10) { + int indx = out - buffer; + + growBufferReentrant(); + out = &buffer[indx]; + } + + /* + * By default one have to encode at least '<', '>', '"' and '&' ! + */ + if (*cur == '<') { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '>') { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '&') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if (*cur == '"') { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; + } else { + /* + * Works because on UTF-8, all extended sequences cannot + * result in bytes in the ASCII range. + */ + *out++ = *cur; + } + cur++; + } + *out++ = 0; + return(buffer); +} + +/** + * xmlCreateEntitiesTable: + * + * create and initialize an empty entities hash table. + * + * Returns the xmlEntitiesTablePtr just created or NULL in case of error. + */ +xmlEntitiesTablePtr +xmlCreateEntitiesTable(void) { + return((xmlEntitiesTablePtr) xmlHashCreate(0)); +} + +/** + * xmlFreeEntitiesTable: + * @table: An entity table + * + * Deallocate the memory used by an entities hash table. + */ +void +xmlFreeEntitiesTable(xmlEntitiesTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity); +} + +/** + * xmlCopyEntity: + * @ent: An entity + * + * Build a copy of an entity + * + * Returns the new xmlEntitiesPtr or NULL in case of error. + */ +static xmlEntityPtr +xmlCopyEntity(xmlEntityPtr ent) { + xmlEntityPtr cur; + + cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyEntity: out of memory !\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlEntity)); + cur->type = XML_ENTITY_DECL; + + cur->etype = ent->etype; + if (ent->name != NULL) + cur->name = xmlStrdup(ent->name); + if (ent->ExternalID != NULL) + cur->ExternalID = xmlStrdup(ent->ExternalID); + if (ent->SystemID != NULL) + cur->SystemID = xmlStrdup(ent->SystemID); + if (ent->content != NULL) + cur->content = xmlStrdup(ent->content); + if (ent->orig != NULL) + cur->orig = xmlStrdup(ent->orig); + if (ent->URI != NULL) + cur->URI = xmlStrdup(ent->URI); + return(cur); +} + +/** + * xmlCopyEntitiesTable: + * @table: An entity table + * + * Build a copy of an entity table. + * + * Returns the new xmlEntitiesTablePtr or NULL in case of error. + */ +xmlEntitiesTablePtr +xmlCopyEntitiesTable(xmlEntitiesTablePtr table) { + return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity)); +} + +/** + * xmlDumpEntityDecl: + * @buf: An XML buffer. + * @ent: An entity table + * + * This will dump the content of the entity table as an XML DTD definition + */ +void +xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) { + switch (ent->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + xmlBufferWriteChar(buf, "<!ENTITY "); + xmlBufferWriteCHAR(buf, ent->name); + xmlBufferWriteChar(buf, " "); + if (ent->orig != NULL) + xmlBufferWriteQuotedString(buf, ent->orig); + else + xmlBufferWriteQuotedString(buf, ent->content); + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + xmlBufferWriteChar(buf, "<!ENTITY "); + xmlBufferWriteCHAR(buf, ent->name); + if (ent->ExternalID != NULL) { + xmlBufferWriteChar(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf, ent->ExternalID); + xmlBufferWriteChar(buf, " "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } else { + xmlBufferWriteChar(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + xmlBufferWriteChar(buf, "<!ENTITY "); + xmlBufferWriteCHAR(buf, ent->name); + if (ent->ExternalID != NULL) { + xmlBufferWriteChar(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf, ent->ExternalID); + xmlBufferWriteChar(buf, " "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } else { + xmlBufferWriteChar(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } + if (ent->content != NULL) { /* Should be true ! */ + xmlBufferWriteChar(buf, " NDATA "); + if (ent->orig != NULL) + xmlBufferWriteCHAR(buf, ent->orig); + else + xmlBufferWriteCHAR(buf, ent->content); + } + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + xmlBufferWriteChar(buf, "<!ENTITY % "); + xmlBufferWriteCHAR(buf, ent->name); + xmlBufferWriteChar(buf, " "); + if (ent->orig == NULL) + xmlBufferWriteQuotedString(buf, ent->content); + else + xmlBufferWriteQuotedString(buf, ent->orig); + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + xmlBufferWriteChar(buf, "<!ENTITY % "); + xmlBufferWriteCHAR(buf, ent->name); + if (ent->ExternalID != NULL) { + xmlBufferWriteChar(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf, ent->ExternalID); + xmlBufferWriteChar(buf, " "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } else { + xmlBufferWriteChar(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } + xmlBufferWriteChar(buf, ">\n"); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpEntitiesDecl: internal: unknown type %d\n", + ent->etype); + } +} + +/** + * xmlDumpEntitiesTable: + * @buf: An XML buffer. + * @table: An entity table + * + * This will dump the content of the entity table as an XML DTD definition + */ +void +xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) { + xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf); +} diff --git a/bundle/libxml/error.c b/bundle/libxml/error.c new file mode 100644 index 0000000000..d6266af051 --- /dev/null +++ b/bundle/libxml/error.c @@ -0,0 +1,420 @@ +/* + * error.c: module displaying/handling XML parser errors + * + * See Copyright for the status of this software. + * + * Daniel Veillard <daniel@veillard.com> + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <stdarg.h> +#include <libxml/parser.h> +#include <libxml/xmlerror.h> +#include <libxml/xmlmemory.h> +#include <libxml/globals.h> + +void xmlGenericErrorDefaultFunc (void *ctx ATTRIBUTE_UNUSED, + const char *msg, + ...); + +#define XML_GET_VAR_STR(msg, str) { \ + int size; \ + int chars; \ + char *larger; \ + va_list ap; \ + \ + str = (char *) xmlMalloc(150); \ + if (str == NULL) \ + return; \ + \ + size = 150; \ + \ + while (1) { \ + va_start(ap, msg); \ + chars = vsnprintf(str, size, msg, ap); \ + va_end(ap); \ + if ((chars > -1) && (chars < size)) \ + break; \ + if (chars > -1) \ + size += chars + 1; \ + else \ + size += 100; \ + if ((larger = (char *) xmlRealloc(str, size)) == NULL) {\ + xmlFree(str); \ + return; \ + } \ + str = larger; \ + } \ +} + +/************************************************************************ + * * + * Handling of out of context errors * + * * + ************************************************************************/ + +/** + * xmlGenericErrorDefaultFunc: + * @ctx: an error context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Default handler for out of context error messages. + */ +void +xmlGenericErrorDefaultFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { + va_list args; + + if (xmlGenericErrorContext == NULL) + xmlGenericErrorContext = (void *) stderr; + + va_start(args, msg); + vfprintf((FILE *)xmlGenericErrorContext, msg, args); + va_end(args); +} + +/** + * initGenericErrorDefaultFunc: + * @handler: the handler + * + * Set or reset (if NULL) the default handler for generic errors + */ +void +initGenericErrorDefaultFunc(xmlGenericErrorFunc * handler) +{ + if (handler == NULL) + xmlGenericError = xmlGenericErrorDefaultFunc; + else + (*handler) = xmlGenericErrorDefaultFunc; +} + +/** + * xmlSetGenericErrorFunc: + * @ctx: the new error handling context + * @handler: the new handler function + * + * Function to reset the handler and the error context for out of + * context error messages. + * This simply means that @handler will be called for subsequent + * error messages while not parsing nor validating. And @ctx will + * be passed as first argument to @handler + * One can simply force messages to be emitted to another FILE * than + * stderr by setting @ctx to this file handle and @handler to NULL. + */ +void +xmlSetGenericErrorFunc(void *ctx, xmlGenericErrorFunc handler) { + xmlGenericErrorContext = ctx; + if (handler != NULL) + xmlGenericError = handler; + else + xmlGenericError = xmlGenericErrorDefaultFunc; +} + +/************************************************************************ + * * + * Handling of parsing errors * + * * + ************************************************************************/ + +/** + * xmlParserPrintFileInfo: + * @input: an xmlParserInputPtr input + * + * Displays the associated file and line informations for the current input + */ + +void +xmlParserPrintFileInfo(xmlParserInputPtr input) { + if (input != NULL) { + if (input->filename) + xmlGenericError(xmlGenericErrorContext, + "%s:%d: ", input->filename, + input->line); + else + xmlGenericError(xmlGenericErrorContext, + "Entity: line %d: ", input->line); + } +} + +/** + * xmlParserPrintFileContext: + * @input: an xmlParserInputPtr input + * + * Displays current context within the input content for error tracking + */ + +void +xmlParserPrintFileContext(xmlParserInputPtr input) { + const xmlChar *cur, *base; + int n; + xmlChar content[81]; + xmlChar *ctnt; + + if (input == NULL) return; + cur = input->cur; + base = input->base; + /* skip backwards over any end-of-lines */ + while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) { + cur--; + } + n = 0; + /* search backwards for beginning-of-line maximum 80 characters */ + while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r')) + cur--; + if ((*cur == '\n') || (*cur == '\r')) cur++; + /* search forward for end-of-line maximum 80 characters */ + n = 0; + ctnt = content; + while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) { + *ctnt++ = *cur++; + n++; + } + *ctnt = 0; + xmlGenericError(xmlGenericErrorContext,"%s\n", content); + /* create blank line with problem pointer */ + cur = input->cur; + while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) { + cur--; + } + n = 0; + ctnt = content; + while ((n++ < 79) && (cur > base) && (*cur != '\n') && (*cur != '\r')) { + *ctnt++ = ' '; + cur--; + } + if (ctnt > content) { + *(--ctnt) = '^'; + *(++ctnt) = 0; + } else { + *ctnt = '^'; + *(++ctnt) = 0; + } + xmlGenericError(xmlGenericErrorContext,"%s\n", content); +} + +#if 0 +/** + * xmlGetVarStr: + * @msg: the message format + * @args: a va_list argument list + * + * SGS contribution + * Get an arbitrary-sized string for an error argument + * The caller must free() the returned string + */ +static char * +xmlGetVarStr(const char * msg, va_list args) { + int size; + int length; + int chars, left; + char *str, *larger; + va_list ap; + + str = (char *) xmlMalloc(150); + if (str == NULL) + return(NULL); + + size = 150; + length = 0; + + while (1) { + left = size - length; + /* Try to print in the allocated space. */ + va_start(msg, ap); + chars = vsnprintf(str + length, left, msg, ap); + va_end(ap); + /* If that worked, we're done. */ + if ((chars > -1) && (chars < left )) + break; + /* Else try again with more space. */ + if (chars > -1) /* glibc 2.1 */ + size += chars + 1; /* precisely what is needed */ + else /* glibc 2.0 */ + size += 100; + if ((larger = (char *) xmlRealloc(str, size)) == NULL) { + xmlFree(str); + return(NULL); + } + str = larger; + } + return(str); +} +#endif + +/** + * xmlParserError: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format an error messages, gives file, line, position and + * extra parameters. + */ +void +xmlParserError(void *ctx, const char *msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr input = NULL; + xmlParserInputPtr cur = NULL; + char * str; + + if (ctxt != NULL) { + input = ctxt->input; + if ((input != NULL) && (input->filename == NULL) && + (ctxt->inputNr > 1)) { + cur = input; + input = ctxt->inputTab[ctxt->inputNr - 2]; + } + xmlParserPrintFileInfo(input); + } + + xmlGenericError(xmlGenericErrorContext, "error: "); + XML_GET_VAR_STR(msg, str); + xmlGenericError(xmlGenericErrorContext, "%s", str); + if (str != NULL) + xmlFree(str); + + if (ctxt != NULL) { + xmlParserPrintFileContext(input); + if (cur != NULL) { + xmlParserPrintFileInfo(cur); + xmlGenericError(xmlGenericErrorContext, "\n"); + xmlParserPrintFileContext(cur); + } + } +} + +/** + * xmlParserWarning: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a warning messages, gives file, line, position and + * extra parameters. + */ +void +xmlParserWarning(void *ctx, const char *msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr input = NULL; + xmlParserInputPtr cur = NULL; + char * str; + + if (ctxt != NULL) { + input = ctxt->input; + if ((input != NULL) && (input->filename == NULL) && + (ctxt->inputNr > 1)) { + cur = input; + input = ctxt->inputTab[ctxt->inputNr - 2]; + } + xmlParserPrintFileInfo(input); + } + + xmlGenericError(xmlGenericErrorContext, "warning: "); + XML_GET_VAR_STR(msg, str); + xmlGenericError(xmlGenericErrorContext, "%s", str); + if (str != NULL) + xmlFree(str); + + if (ctxt != NULL) { + xmlParserPrintFileContext(input); + if (cur != NULL) { + xmlParserPrintFileInfo(cur); + xmlGenericError(xmlGenericErrorContext, "\n"); + xmlParserPrintFileContext(cur); + } + } +} + +/************************************************************************ + * * + * Handling of validation errors * + * * + ************************************************************************/ + +/** + * xmlParserValidityError: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format an validity error messages, gives file, + * line, position and extra parameters. + */ +void +xmlParserValidityError(void *ctx, const char *msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr input = NULL; + char * str; + int len = xmlStrlen((const xmlChar *) msg); + static int had_info = 0; + int need_context = 0; + + if ((len > 1) && (msg[len - 2] != ':')) { + if (ctxt != NULL) { + input = ctxt->input; + if ((input->filename == NULL) && (ctxt->inputNr > 1)) + input = ctxt->inputTab[ctxt->inputNr - 2]; + + if (had_info == 0) { + xmlParserPrintFileInfo(input); + } + } + xmlGenericError(xmlGenericErrorContext, "validity error: "); + need_context = 1; + had_info = 0; + } else { + had_info = 1; + } + + XML_GET_VAR_STR(msg, str); + xmlGenericError(xmlGenericErrorContext, "%s", str); + if (str != NULL) + xmlFree(str); + + if ((ctxt != NULL) && (input != NULL)) { + xmlParserPrintFileContext(input); + } +} + +/** + * xmlParserValidityWarning: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a validity warning messages, gives file, line, + * position and extra parameters. + */ +void +xmlParserValidityWarning(void *ctx, const char *msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr input = NULL; + char * str; + int len = xmlStrlen((const xmlChar *) msg); + + if ((ctxt != NULL) && (len != 0) && (msg[len - 1] != ':')) { + input = ctxt->input; + if ((input->filename == NULL) && (ctxt->inputNr > 1)) + input = ctxt->inputTab[ctxt->inputNr - 2]; + + xmlParserPrintFileInfo(input); + } + + xmlGenericError(xmlGenericErrorContext, "validity warning: "); + XML_GET_VAR_STR(msg, str); + xmlGenericError(xmlGenericErrorContext, "%s", str); + if (str != NULL) + xmlFree(str); + + if (ctxt != NULL) { + xmlParserPrintFileContext(input); + } +} + + diff --git a/bundle/libxml/globals.c b/bundle/libxml/globals.c new file mode 100644 index 0000000000..72865c323a --- /dev/null +++ b/bundle/libxml/globals.c @@ -0,0 +1,653 @@ +/* + * globals.c: definition and handling of the set of global variables + * of the library + * + * The bottom of this file is automatically generated by build_glob.py + * based on the description file global.data + * + * See Copyright for the status of this software. + * + * Gary Pennington <Gary.Pennington@uk.sun.com> + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#include <string.h> + +#include <libxml/globals.h> +#include <libxml/xmlmemory.h> + +/* #define DEBUG_GLOBALS */ + +/* + * Helpful Macro + */ +#ifdef LIBXML_THREAD_ENABLED +#define IS_MAIN_THREAD (xmlIsMainThread()) +#else +#define IS_MAIN_THREAD 1 +#endif + +/************************************************************************ + * * + * All the user accessible global variables of the library * + * * + ************************************************************************/ + +/* + * Memory allocation routines + */ +#if defined(DEBUG_MEMORY_LOCATION) || defined(DEBUG_MEMORY) +extern void xmlMemFree(void *ptr); +extern void * xmlMemMalloc(size_t size); +extern void * xmlMemRealloc(void *ptr,size_t size); +extern char * xmlMemoryStrdup(const char *str); + +xmlFreeFunc xmlFree = (xmlFreeFunc) xmlMemFree; +xmlMallocFunc xmlMalloc = (xmlMallocFunc) xmlMemMalloc; +xmlReallocFunc xmlRealloc = (xmlReallocFunc) xmlMemRealloc; +xmlStrdupFunc xmlMemStrdup = (xmlStrdupFunc) xmlMemoryStrdup; +#else +/** + * xmlFree: + * @mem: an already allocated block of memory + * + * The variable holding the libxml free() implementation + */ +xmlFreeFunc xmlFree = (xmlFreeFunc) free; +/** + * xmlMalloc: + * @size: the size requested in bytes + * + * The variable holding the libxml malloc() implementation + * + * Returns a pointer to the newly allocated block or NULL in case of error + */ +xmlMallocFunc xmlMalloc = (xmlMallocFunc) malloc; +/** + * xmlRealloc: + * @mem: an already allocated block of memory + * @size: the new size requested in bytes + * + * The variable holding the libxml realloc() implementation + * + * Returns a pointer to the newly reallocated block or NULL in case of error + */ +xmlReallocFunc xmlRealloc = (xmlReallocFunc) realloc; +/** + * xmlMemStrdup: + * @str: a zero terminated string + * + * The variable holding the libxml strdup() implementation + * + * Returns the copy of the string or NULL in case of error + */ +xmlStrdupFunc xmlMemStrdup = (xmlStrdupFunc) xmlStrdup; +#endif + +#include <libxml/threads.h> +#include <libxml/globals.h> +#include <libxml/SAX.h> + +#undef docbDefaultSAXHandler +#undef htmlDefaultSAXHandler +#undef oldXMLWDcompatibility +#undef xmlBufferAllocScheme +#undef xmlDefaultBufferSize +#undef xmlDefaultSAXHandler +#undef xmlDefaultSAXLocator +#undef xmlDoValidityCheckingDefaultValue +#undef xmlGenericError +#undef xmlGenericErrorContext +#undef xmlGetWarningsDefaultValue +#undef xmlIndentTreeOutput +#undef xmlTreeIndentString +#undef xmlKeepBlanksDefaultValue +#undef xmlLineNumbersDefaultValue +#undef xmlLoadExtDtdDefaultValue +#undef xmlParserDebugEntities +#undef xmlParserVersion +#undef xmlPedanticParserDefaultValue +#undef xmlSaveNoEmptyTags +#undef xmlSubstituteEntitiesDefaultValue + +#undef xmlFree +#undef xmlMalloc +#undef xmlMemStrdup +#undef xmlRealloc + +/** + * xmlParserVersion: + * + * Constant string describing the internal version of the library + */ +const char *xmlParserVersion = LIBXML_VERSION_STRING; + +/** + * xmlBufferAllocScheme: + * + * Global setting, default allocation policy for buffers, default is + * XML_BUFFER_ALLOC_EXACT + */ +xmlBufferAllocationScheme xmlBufferAllocScheme = XML_BUFFER_ALLOC_EXACT; +/** + * xmlDefaultBufferSize: + * + * Global setting, default buffer size. Default value is BASE_BUFFER_SIZE + */ +int xmlDefaultBufferSize = BASE_BUFFER_SIZE; + +/* + * Parser defaults + */ + +/** + * oldXMLWDcompatibility: + * + * Global setting, DEPRECATED. + */ +int oldXMLWDcompatibility = 0; /* DEPRECATED */ +/** + * xmlParserDebugEntities: + * + * Global setting, asking the parser to print out debugging informations. + * while handling entities. + * Disabled by default + */ +int xmlParserDebugEntities = 0; +/** + * xmlDoValidityCheckingDefaultValue: + * + * Global setting, indicate that the parser should work in validating mode. + * Disabled by default. + */ +int xmlDoValidityCheckingDefaultValue = 0; +/** + * xmlGetWarningsDefaultValue: + * + * Global setting, indicate that the parser should provide warnings. + * Activated by default. + */ +int xmlGetWarningsDefaultValue = 1; +/** + * xmlLoadExtDtdDefaultValue: + * + * Global setting, indicate that the parser should load DTD while not + * validating. + * Disabled by default. + */ +int xmlLoadExtDtdDefaultValue = 0; +/** + * xmlPedanticParserDefaultValue: + * + * Global setting, indicate that the parser be pedantic + * Disabled by default. + */ +int xmlPedanticParserDefaultValue = 0; +/** + * xmlLineNumbersDefaultValue: + * + * Global setting, indicate that the parser should store the line number + * in the content field of elements in the DOM tree. + * Disabled by default since this may not be safe for old classes of + * applicaton. + */ +int xmlLineNumbersDefaultValue = 0; +/** + * xmlKeepBlanksDefaultValue: + * + * Global setting, indicate that the parser should keep all blanks + * nodes found in the content + * Activated by default, this is actually needed to have the parser + * conformant to the XML Recommendation, however the option is kept + * for some applications since this was libxml1 default behaviour. + */ +int xmlKeepBlanksDefaultValue = 1; +/** + * xmlSubstituteEntitiesDefaultValue: + * + * Global setting, indicate that the parser should not generate entity + * references but replace them with the actual content of the entity + * Disabled by default, this should be activated when using XPath since + * the XPath data model requires entities replacement and the XPath + * engine does not handle entities references transparently. + */ +int xmlSubstituteEntitiesDefaultValue = 0; + +/* + * Error handling + */ + +/* xmlGenericErrorFunc xmlGenericError = xmlGenericErrorDefaultFunc; */ +/* Must initialize xmlGenericError in xmlInitParser */ +void xmlGenericErrorDefaultFunc (void *ctx ATTRIBUTE_UNUSED, + const char *msg, + ...); +/** + * xmlGenericError: + * + * Global setting: function used for generic error callbacks + */ +xmlGenericErrorFunc xmlGenericError = xmlGenericErrorDefaultFunc; +/** + * xmlGenericErrorContext: + * + * Global setting passed to generic error callbacks + */ +void *xmlGenericErrorContext = NULL; + +/* + * output defaults + */ +/** + * xmlIndentTreeOutput: + * + * Global setting, asking the serializer to indent the output tree by default + * Enabled by default + */ +int xmlIndentTreeOutput = 1; + +/** + * xmlTreeIndentString: + * + * The string used to do one-level indent. By default is equal to " " (two spaces) + */ +const char *xmlTreeIndentString = " "; + +/** + * xmlSaveNoEmptyTags: + * + * Global setting, asking the serializer to not output empty tags + * as <empty/> but <empty></empty>. those two forms are undistinguishable + * once parsed. + * Disabled by default + */ +int xmlSaveNoEmptyTags = 0; + +/** + * xmlDefaultSAXHandler: + * + * Default handler for XML, builds the DOM tree + */ +xmlSAXHandler xmlDefaultSAXHandler = { + internalSubset, + isStandalone, + hasInternalSubset, + hasExternalSubset, + resolveEntity, + getEntity, + entityDecl, + notationDecl, + attributeDecl, + elementDecl, + unparsedEntityDecl, + setDocumentLocator, + startDocument, + endDocument, + startElement, + endElement, + reference, + characters, + characters, + processingInstruction, + comment, + xmlParserWarning, + xmlParserError, + xmlParserError, + getParameterEntity, + cdataBlock, + externalSubset, + 0 +}; + +/** + * xmlDefaultSAXLocator: + * + * The default SAX Locator + * { getPublicId, getSystemId, getLineNumber, getColumnNumber} + */ +xmlSAXLocator xmlDefaultSAXLocator = { + getPublicId, getSystemId, getLineNumber, getColumnNumber +}; + +#ifdef LIBXML_HTML_ENABLED +/** + * htmlDefaultSAXHandler: + * + * Default handler for HTML, builds the DOM tree + */ +xmlSAXHandler htmlDefaultSAXHandler = { + internalSubset, + NULL, + NULL, + NULL, + NULL, + getEntity, + NULL, + NULL, + NULL, + NULL, + NULL, + setDocumentLocator, + startDocument, + endDocument, + startElement, + endElement, + NULL, + characters, + ignorableWhitespace, + NULL, + comment, + xmlParserWarning, + xmlParserError, + xmlParserError, + getParameterEntity, + cdataBlock, + NULL, + 0 +}; +#endif /* LIBXML_HTML_ENABLED */ + +#ifdef LIBXML_DOCB_ENABLED +/** + * docbDefaultSAXHandler: + * + * Default handler for SGML DocBook, builds the DOM tree + */ +xmlSAXHandler docbDefaultSAXHandler = { + internalSubset, + isStandalone, + hasInternalSubset, + hasExternalSubset, + resolveEntity, + getEntity, + entityDecl, + NULL, + NULL, + NULL, + NULL, + setDocumentLocator, + startDocument, + endDocument, + startElement, + endElement, + reference, + characters, + ignorableWhitespace, + NULL, + comment, + xmlParserWarning, + xmlParserError, + xmlParserError, + getParameterEntity, + NULL, + NULL, + 0 +}; +#endif /* LIBXML_DOCB_ENABLED */ + +/** + * xmlInitializeGlobalState: + * @gs: a pointer to a newly allocated global state + * + * xmlInitializeGlobalState() initialize a global state with all the + * default values of the library. + */ +void +xmlInitializeGlobalState(xmlGlobalStatePtr gs) +{ +#ifdef DEBUG_GLOBALS + fprintf(stderr, "Initializing globals at %lu for thread %d\n", + (unsigned long) gs, xmlGetThreadId()); +#endif + + /* + * Perform initialization as required by libxml + */ + +#ifdef LIBXML_DOCB_ENABLED + initdocbDefaultSAXHandler(&gs->docbDefaultSAXHandler); +#endif +#ifdef LIBXML_HTML_ENABLED + inithtmlDefaultSAXHandler(&gs->htmlDefaultSAXHandler); +#endif + initGenericErrorDefaultFunc(&gs->xmlGenericError); + + gs->oldXMLWDcompatibility = 0; + gs->xmlBufferAllocScheme = XML_BUFFER_ALLOC_EXACT; + gs->xmlDefaultBufferSize = BASE_BUFFER_SIZE; + initxmlDefaultSAXHandler(&gs->xmlDefaultSAXHandler, 1); + gs->xmlDefaultSAXLocator.getPublicId = getPublicId; + gs->xmlDefaultSAXLocator.getSystemId = getSystemId; + gs->xmlDefaultSAXLocator.getLineNumber = getLineNumber; + gs->xmlDefaultSAXLocator.getColumnNumber = getColumnNumber; + gs->xmlDoValidityCheckingDefaultValue = 0; +#if defined(DEBUG_MEMORY_LOCATION) | defined(DEBUG_MEMORY) + gs->xmlFree = (xmlFreeFunc) xmlMemFree; + gs->xmlMalloc = (xmlMallocFunc) xmlMemMalloc; + gs->xmlRealloc = (xmlReallocFunc) xmlMemRealloc; + gs->xmlMemStrdup = (xmlStrdupFunc) xmlMemoryStrdup; +#else + gs->xmlFree = (xmlFreeFunc) free; + gs->xmlMalloc = (xmlMallocFunc) malloc; + gs->xmlRealloc = (xmlReallocFunc) realloc; + gs->xmlMemStrdup = (xmlStrdupFunc) xmlStrdup; +#endif + gs->xmlGenericErrorContext = NULL; + gs->xmlGetWarningsDefaultValue = 1; + gs->xmlIndentTreeOutput = 1; + gs->xmlTreeIndentString = " "; + gs->xmlKeepBlanksDefaultValue = 1; + gs->xmlLineNumbersDefaultValue = 0; + gs->xmlLoadExtDtdDefaultValue = 0; + gs->xmlParserDebugEntities = 0; + gs->xmlParserVersion = LIBXML_VERSION_STRING; + gs->xmlPedanticParserDefaultValue = 0; + gs->xmlSaveNoEmptyTags = 0; + gs->xmlSubstituteEntitiesDefaultValue = 0; +} + +#ifdef LIBXML_DOCB_ENABLED +#undef docbDefaultSAXHandler +xmlSAXHandler * +__docbDefaultSAXHandler(void) { + if (IS_MAIN_THREAD) + return (&docbDefaultSAXHandler); + else + return (&xmlGetGlobalState()->docbDefaultSAXHandler); +} +#endif + +#ifdef LIBXML_HTML_ENABLED +#undef htmlDefaultSAXHandler +xmlSAXHandler * +__htmlDefaultSAXHandler(void) { + if (IS_MAIN_THREAD) + return (&htmlDefaultSAXHandler); + else + return (&xmlGetGlobalState()->htmlDefaultSAXHandler); +} +#endif + +/* + * Everything starting from the line below is + * Automatically generated by build_glob.py. + * Do not modify the previous line. + */ + + +#undef oldXMLWDcompatibility +int * +__oldXMLWDcompatibility(void) { + if (IS_MAIN_THREAD) + return (&oldXMLWDcompatibility); + else + return (&xmlGetGlobalState()->oldXMLWDcompatibility); +} + +#undef xmlBufferAllocScheme +xmlBufferAllocationScheme * +__xmlBufferAllocScheme(void) { + if (IS_MAIN_THREAD) + return (&xmlBufferAllocScheme); + else + return (&xmlGetGlobalState()->xmlBufferAllocScheme); +} + +#undef xmlDefaultBufferSize +int * +__xmlDefaultBufferSize(void) { + if (IS_MAIN_THREAD) + return (&xmlDefaultBufferSize); + else + return (&xmlGetGlobalState()->xmlDefaultBufferSize); +} + +#undef xmlDefaultSAXHandler +xmlSAXHandler * +__xmlDefaultSAXHandler(void) { + if (IS_MAIN_THREAD) + return (&xmlDefaultSAXHandler); + else + return (&xmlGetGlobalState()->xmlDefaultSAXHandler); +} + +#undef xmlDefaultSAXLocator +xmlSAXLocator * +__xmlDefaultSAXLocator(void) { + if (IS_MAIN_THREAD) + return (&xmlDefaultSAXLocator); + else + return (&xmlGetGlobalState()->xmlDefaultSAXLocator); +} + +#undef xmlDoValidityCheckingDefaultValue +int * +__xmlDoValidityCheckingDefaultValue(void) { + if (IS_MAIN_THREAD) + return (&xmlDoValidityCheckingDefaultValue); + else + return (&xmlGetGlobalState()->xmlDoValidityCheckingDefaultValue); +} + +#undef xmlGenericError +xmlGenericErrorFunc * +__xmlGenericError(void) { + if (IS_MAIN_THREAD) + return (&xmlGenericError); + else + return (&xmlGetGlobalState()->xmlGenericError); +} + +#undef xmlGenericErrorContext +void * * +__xmlGenericErrorContext(void) { + if (IS_MAIN_THREAD) + return (&xmlGenericErrorContext); + else + return (&xmlGetGlobalState()->xmlGenericErrorContext); +} + +#undef xmlGetWarningsDefaultValue +int * +__xmlGetWarningsDefaultValue(void) { + if (IS_MAIN_THREAD) + return (&xmlGetWarningsDefaultValue); + else + return (&xmlGetGlobalState()->xmlGetWarningsDefaultValue); +} + +#undef xmlIndentTreeOutput +int * +__xmlIndentTreeOutput(void) { + if (IS_MAIN_THREAD) + return (&xmlIndentTreeOutput); + else + return (&xmlGetGlobalState()->xmlIndentTreeOutput); +} + +#undef xmlTreeIndentString +const char * * +__xmlTreeIndentString(void) { + if (IS_MAIN_THREAD) + return (&xmlTreeIndentString); + else + return (&xmlGetGlobalState()->xmlTreeIndentString); +} + +#undef xmlKeepBlanksDefaultValue +int * +__xmlKeepBlanksDefaultValue(void) { + if (IS_MAIN_THREAD) + return (&xmlKeepBlanksDefaultValue); + else + return (&xmlGetGlobalState()->xmlKeepBlanksDefaultValue); +} + +#undef xmlLineNumbersDefaultValue +int * +__xmlLineNumbersDefaultValue(void) { + if (IS_MAIN_THREAD) + return (&xmlLineNumbersDefaultValue); + else + return (&xmlGetGlobalState()->xmlLineNumbersDefaultValue); +} + +#undef xmlLoadExtDtdDefaultValue +int * +__xmlLoadExtDtdDefaultValue(void) { + if (IS_MAIN_THREAD) + return (&xmlLoadExtDtdDefaultValue); + else + return (&xmlGetGlobalState()->xmlLoadExtDtdDefaultValue); +} + +#undef xmlParserDebugEntities +int * +__xmlParserDebugEntities(void) { + if (IS_MAIN_THREAD) + return (&xmlParserDebugEntities); + else + return (&xmlGetGlobalState()->xmlParserDebugEntities); +} + +#undef xmlParserVersion +const char * * +__xmlParserVersion(void) { + if (IS_MAIN_THREAD) + return (&xmlParserVersion); + else + return (&xmlGetGlobalState()->xmlParserVersion); +} + +#undef xmlPedanticParserDefaultValue +int * +__xmlPedanticParserDefaultValue(void) { + if (IS_MAIN_THREAD) + return (&xmlPedanticParserDefaultValue); + else + return (&xmlGetGlobalState()->xmlPedanticParserDefaultValue); +} + +#undef xmlSaveNoEmptyTags +int * +__xmlSaveNoEmptyTags(void) { + if (IS_MAIN_THREAD) + return (&xmlSaveNoEmptyTags); + else + return (&xmlGetGlobalState()->xmlSaveNoEmptyTags); +} + +#undef xmlSubstituteEntitiesDefaultValue +int * +__xmlSubstituteEntitiesDefaultValue(void) { + if (IS_MAIN_THREAD) + return (&xmlSubstituteEntitiesDefaultValue); + else + return (&xmlGetGlobalState()->xmlSubstituteEntitiesDefaultValue); +} diff --git a/bundle/libxml/hash.c b/bundle/libxml/hash.c new file mode 100644 index 0000000000..2a4d000872 --- /dev/null +++ b/bundle/libxml/hash.c @@ -0,0 +1,822 @@ +/* + * hash.c: chained hash tables + * + * Reference: Your favorite introductory book on algorithms + * + * Copyright (C) 2000 Bjorn Reese and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: breese@users.sourceforge.net + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> +#include <libxml/hash.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> +#include <libxml/xmlerror.h> +#include <libxml/globals.h> + +#define MAX_HASH_LEN 8 + +/* #define DEBUG_GROW */ + +/* + * A single entry in the hash table + */ +typedef struct _xmlHashEntry xmlHashEntry; +typedef xmlHashEntry *xmlHashEntryPtr; +struct _xmlHashEntry { + struct _xmlHashEntry *next; + xmlChar *name; + xmlChar *name2; + xmlChar *name3; + void *payload; + int valid; +}; + +/* + * The entire hash table + */ +struct _xmlHashTable { + struct _xmlHashEntry *table; + int size; + int nbElems; +}; + +/* + * xmlHashComputeKey: + * Calculate the hash key + */ +static unsigned long +xmlHashComputeKey(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3) { + unsigned long value = 0L; + char ch; + + if (name != NULL) { + value += 30 * (*name); + while ((ch = *name++) != 0) { + value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch); + } + } + if (name2 != NULL) { + while ((ch = *name2++) != 0) { + value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch); + } + } + if (name3 != NULL) { + while ((ch = *name3++) != 0) { + value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch); + } + } + return (value % table->size); +} + +/** + * xmlHashCreate: + * @size: the size of the hash table + * + * Create a new xmlHashTablePtr. + * + * Returns the newly created object, or NULL if an error occured. + */ +xmlHashTablePtr +xmlHashCreate(int size) { + xmlHashTablePtr table; + + if (size <= 0) + size = 256; + + table = xmlMalloc(sizeof(xmlHashTable)); + if (table) { + table->size = size; + table->nbElems = 0; + table->table = xmlMalloc(size * sizeof(xmlHashEntry)); + if (table->table) { + memset(table->table, 0, size * sizeof(xmlHashEntry)); + return(table); + } + xmlFree(table); + } + return(NULL); +} + +/** + * xmlHashGrow: + * @table: the hash table + * @size: the new size of the hash table + * + * resize the hash table + * + * Returns 0 in case of success, -1 in case of failure + */ +static int +xmlHashGrow(xmlHashTablePtr table, int size) { + unsigned long key; + int oldsize, i; + xmlHashEntryPtr iter, next; + struct _xmlHashEntry *oldtable; +#ifdef DEBUG_GROW + unsigned long nbElem = 0; +#endif + + if (table == NULL) + return(-1); + if (size < 8) + return(-1); + if (size > 8 * 2048) + return(-1); + + oldsize = table->size; + oldtable = table->table; + if (oldtable == NULL) + return(-1); + + table->table = xmlMalloc(size * sizeof(xmlHashEntry)); + if (table->table == NULL) { + table->table = oldtable; + return(-1); + } + memset(table->table, 0, size * sizeof(xmlHashEntry)); + table->size = size; + + /* If the two loops are merged, there would be situations where + a new entry needs to allocated and data copied into it from + the main table. So instead, we run through the array twice, first + copying all the elements in the main array (where we can't get + conflicts) and then the rest, so we only free (and don't allocate) + */ + for (i = 0; i < oldsize; i++) { + if (oldtable[i].valid == 0) + continue; + key = xmlHashComputeKey(table, oldtable[i].name, oldtable[i].name2, + oldtable[i].name3); + memcpy(&(table->table[key]), &(oldtable[i]), sizeof(xmlHashEntry)); + table->table[key].next = NULL; + } + + for (i = 0; i < oldsize; i++) { + iter = oldtable[i].next; + while (iter) { + next = iter->next; + + /* + * put back the entry in the new table + */ + + key = xmlHashComputeKey(table, iter->name, iter->name2, + iter->name3); + if (table->table[key].valid == 0) { + memcpy(&(table->table[key]), iter, sizeof(xmlHashEntry)); + table->table[key].next = NULL; + xmlFree(iter); + } else { + iter->next = table->table[key].next; + table->table[key].next = iter; + } + +#ifdef DEBUG_GROW + nbElem++; +#endif + + iter = next; + } + } + + xmlFree(oldtable); + +#ifdef DEBUG_GROW + xmlGenericError(xmlGenericErrorContext, + "xmlHashGrow : from %d to %d, %d elems\n", oldsize, size, nbElem); +#endif + + return(0); +} + +/** + * xmlHashFree: + * @table: the hash table + * @f: the deallocator function for items in the hash + * + * Free the hash @table and its contents. The userdata is + * deallocated with @f if provided. + */ +void +xmlHashFree(xmlHashTablePtr table, xmlHashDeallocator f) { + int i; + xmlHashEntryPtr iter; + xmlHashEntryPtr next; + int inside_table = 0; + + if (table == NULL) + return; + if (table->table) { + for(i = 0; i < table->size; i++) { + iter = &(table->table[i]); + if (iter->valid == 0) + continue; + inside_table = 1; + while (iter) { + next = iter->next; + if (f) + f(iter->payload, iter->name); + if (iter->name) + xmlFree(iter->name); + if (iter->name2) + xmlFree(iter->name2); + if (iter->name3) + xmlFree(iter->name3); + iter->payload = NULL; + if (!inside_table) + xmlFree(iter); + inside_table = 0; + iter = next; + } + inside_table = 0; + } + xmlFree(table->table); + } + xmlFree(table); +} + +/** + * xmlHashAddEntry: + * @table: the hash table + * @name: the name of the userdata + * @userdata: a pointer to the userdata + * + * Add the @userdata to the hash @table. This can later be retrieved + * by using the @name. Duplicate names generate errors. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashAddEntry(xmlHashTablePtr table, const xmlChar *name, void *userdata) { + return(xmlHashAddEntry3(table, name, NULL, NULL, userdata)); +} + +/** + * xmlHashAddEntry2: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @userdata: a pointer to the userdata + * + * Add the @userdata to the hash @table. This can later be retrieved + * by using the (@name, @name2) tuple. Duplicate tuples generate errors. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashAddEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, void *userdata) { + return(xmlHashAddEntry3(table, name, name2, NULL, userdata)); +} + +/** + * xmlHashUpdateEntry: + * @table: the hash table + * @name: the name of the userdata + * @userdata: a pointer to the userdata + * @f: the deallocator function for replaced item (if any) + * + * Add the @userdata to the hash @table. This can later be retrieved + * by using the @name. Existing entry for this @name will be removed + * and freed with @f if found. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashUpdateEntry(xmlHashTablePtr table, const xmlChar *name, + void *userdata, xmlHashDeallocator f) { + return(xmlHashUpdateEntry3(table, name, NULL, NULL, userdata, f)); +} + +/** + * xmlHashUpdateEntry2: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @userdata: a pointer to the userdata + * @f: the deallocator function for replaced item (if any) + * + * Add the @userdata to the hash @table. This can later be retrieved + * by using the (@name, @name2) tuple. Existing entry for this tuple will + * be removed and freed with @f if found. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashUpdateEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, void *userdata, + xmlHashDeallocator f) { + return(xmlHashUpdateEntry3(table, name, name2, NULL, userdata, f)); +} + +/** + * xmlHashLookup: + * @table: the hash table + * @name: the name of the userdata + * + * Find the userdata specified by the @name. + * + * Returns the pointer to the userdata + */ +void * +xmlHashLookup(xmlHashTablePtr table, const xmlChar *name) { + return(xmlHashLookup3(table, name, NULL, NULL)); +} + +/** + * xmlHashLookup2: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * + * Find the userdata specified by the (@name, @name2) tuple. + * + * Returns the pointer to the userdata + */ +void * +xmlHashLookup2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2) { + return(xmlHashLookup3(table, name, name2, NULL)); +} + +/** + * xmlHashAddEntry3: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @name3: a third name of the userdata + * @userdata: a pointer to the userdata + * + * Add the @userdata to the hash @table. This can later be retrieved + * by using the tuple (@name, @name2, @name3). Duplicate entries generate + * errors. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashAddEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + void *userdata) { + unsigned long key, len = 0; + xmlHashEntryPtr entry; + xmlHashEntryPtr insert; + + if ((table == NULL) || name == NULL) + return(-1); + + /* + * Check for duplicate and insertion location. + */ + key = xmlHashComputeKey(table, name, name2, name3); + if (table->table[key].valid == 0) { + insert = NULL; + } else { + for (insert = &(table->table[key]); insert->next != NULL; + insert = insert->next) { + if ((xmlStrEqual(insert->name, name)) && + (xmlStrEqual(insert->name2, name2)) && + (xmlStrEqual(insert->name3, name3))) + return(-1); + len++; + } + if ((xmlStrEqual(insert->name, name)) && + (xmlStrEqual(insert->name2, name2)) && + (xmlStrEqual(insert->name3, name3))) + return(-1); + } + + if (insert == NULL) { + entry = &(table->table[key]); + } else { + entry = xmlMalloc(sizeof(xmlHashEntry)); + if (entry == NULL) + return(-1); + } + + entry->name = xmlStrdup(name); + entry->name2 = xmlStrdup(name2); + entry->name3 = xmlStrdup(name3); + entry->payload = userdata; + entry->next = NULL; + entry->valid = 1; + + + if (insert != NULL) + insert->next = entry; + + table->nbElems++; + + if (len > MAX_HASH_LEN) + xmlHashGrow(table, MAX_HASH_LEN * table->size); + + return(0); +} + +/** + * xmlHashUpdateEntry3: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @name3: a third name of the userdata + * @userdata: a pointer to the userdata + * @f: the deallocator function for replaced item (if any) + * + * Add the @userdata to the hash @table. This can later be retrieved + * by using the tuple (@name, @name2, @name3). Existing entry for this tuple + * will be removed and freed with @f if found. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashUpdateEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + void *userdata, xmlHashDeallocator f) { + unsigned long key; + xmlHashEntryPtr entry; + xmlHashEntryPtr insert; + + if ((table == NULL) || name == NULL) + return(-1); + + /* + * Check for duplicate and insertion location. + */ + key = xmlHashComputeKey(table, name, name2, name3); + if (table->table[key].valid == 0) { + insert = NULL; + } else { + for (insert = &(table->table[key]); insert->next != NULL; + insert = insert->next) { + if ((xmlStrEqual(insert->name, name)) && + (xmlStrEqual(insert->name2, name2)) && + (xmlStrEqual(insert->name3, name3))) { + if (f) + f(insert->payload, insert->name); + insert->payload = userdata; + return(0); + } + } + if ((xmlStrEqual(insert->name, name)) && + (xmlStrEqual(insert->name2, name2)) && + (xmlStrEqual(insert->name3, name3))) { + if (f) + f(insert->payload, insert->name); + insert->payload = userdata; + return(0); + } + } + + if (insert == NULL) { + entry = &(table->table[key]); + } else { + entry = xmlMalloc(sizeof(xmlHashEntry)); + if (entry == NULL) + return(-1); + } + + entry->name = xmlStrdup(name); + entry->name2 = xmlStrdup(name2); + entry->name3 = xmlStrdup(name3); + entry->payload = userdata; + entry->next = NULL; + entry->valid = 1; + table->nbElems++; + + + if (insert != NULL) { + insert->next = entry; + } + return(0); +} + +/** + * xmlHashLookup3: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @name3: a third name of the userdata + * + * Find the userdata specified by the (@name, @name2, @name3) tuple. + * + * Returns the a pointer to the userdata + */ +void * +xmlHashLookup3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3) { + unsigned long key; + xmlHashEntryPtr entry; + + if (table == NULL) + return(NULL); + if (name == NULL) + return(NULL); + key = xmlHashComputeKey(table, name, name2, name3); + if (table->table[key].valid == 0) + return(NULL); + for (entry = &(table->table[key]); entry != NULL; entry = entry->next) { + if ((xmlStrEqual(entry->name, name)) && + (xmlStrEqual(entry->name2, name2)) && + (xmlStrEqual(entry->name3, name3))) + return(entry->payload); + } + return(NULL); +} + +typedef struct { + xmlHashScanner hashscanner; + void *data; +} stubData; + +static void +stubHashScannerFull (void *payload, void *data, const xmlChar *name, + const xmlChar *name2 ATTRIBUTE_UNUSED, + const xmlChar *name3 ATTRIBUTE_UNUSED) { + stubData *stubdata = (stubData *) data; + stubdata->hashscanner (payload, stubdata->data, (xmlChar *) name); +} + +/** + * xmlHashScan: + * @table: the hash table + * @f: the scanner function for items in the hash + * @data: extra data passed to f + * + * Scan the hash @table and applied @f to each value. + */ +void +xmlHashScan(xmlHashTablePtr table, xmlHashScanner f, void *data) { + stubData stubdata; + stubdata.data = data; + stubdata.hashscanner = f; + xmlHashScanFull (table, stubHashScannerFull, &stubdata); +} + +/** + * xmlHashScanFull: + * @table: the hash table + * @f: the scanner function for items in the hash + * @data: extra data passed to f + * + * Scan the hash @table and applied @f to each value. + */ +void +xmlHashScanFull(xmlHashTablePtr table, xmlHashScannerFull f, void *data) { + int i; + xmlHashEntryPtr iter; + xmlHashEntryPtr next; + + if (table == NULL) + return; + if (f == NULL) + return; + + if (table->table) { + for(i = 0; i < table->size; i++) { + if (table->table[i].valid == 0) + continue; + iter = &(table->table[i]); + while (iter) { + next = iter->next; + if (f) + f(iter->payload, data, iter->name, + iter->name2, iter->name3); + iter = next; + } + } + } +} + +/** + * xmlHashScan3: + * @table: the hash table + * @name: the name of the userdata or NULL + * @name2: a second name of the userdata or NULL + * @name3: a third name of the userdata or NULL + * @f: the scanner function for items in the hash + * @data: extra data passed to f + * + * Scan the hash @table and applied @f to each value matching + * (@name, @name2, @name3) tuple. If one of the names is null, + * the comparison is considered to match. + */ +void +xmlHashScan3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + xmlHashScanner f, void *data) { + xmlHashScanFull3 (table, name, name2, name3, + (xmlHashScannerFull) f, data); +} + +/** + * xmlHashScanFull3: + * @table: the hash table + * @name: the name of the userdata or NULL + * @name2: a second name of the userdata or NULL + * @name3: a third name of the userdata or NULL + * @f: the scanner function for items in the hash + * @data: extra data passed to f + * + * Scan the hash @table and applied @f to each value matching + * (@name, @name2, @name3) tuple. If one of the names is null, + * the comparison is considered to match. + */ +void +xmlHashScanFull3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + xmlHashScannerFull f, void *data) { + int i; + xmlHashEntryPtr iter; + xmlHashEntryPtr next; + + if (table == NULL) + return; + if (f == NULL) + return; + + if (table->table) { + for(i = 0; i < table->size; i++) { + if (table->table[i].valid == 0) + continue; + iter = &(table->table[i]); + while (iter) { + next = iter->next; + if (((name == NULL) || (xmlStrEqual(name, iter->name))) && + ((name2 == NULL) || (xmlStrEqual(name2, iter->name2))) && + ((name3 == NULL) || (xmlStrEqual(name3, iter->name3)))) { + f(iter->payload, data, iter->name, + iter->name2, iter->name3); + } + iter = next; + } + } + } +} + +/** + * xmlHashCopy: + * @table: the hash table + * @f: the copier function for items in the hash + * + * Scan the hash @table and applied @f to each value. + * + * Returns the new table or NULL in case of error. + */ +xmlHashTablePtr +xmlHashCopy(xmlHashTablePtr table, xmlHashCopier f) { + int i; + xmlHashEntryPtr iter; + xmlHashEntryPtr next; + xmlHashTablePtr ret; + + if (table == NULL) + return(NULL); + if (f == NULL) + return(NULL); + + ret = xmlHashCreate(table->size); + if (table->table) { + for(i = 0; i < table->size; i++) { + if (table->table[i].valid == 0) + continue; + iter = &(table->table[i]); + while (iter) { + next = iter->next; + xmlHashAddEntry3(ret, iter->name, iter->name2, + iter->name3, f(iter->payload, iter->name)); + iter = next; + } + } + } + ret->nbElems = table->nbElems; + return(ret); +} + +/** + * xmlHashSize: + * @table: the hash table + * + * Query the number of elements installed in the hash @table. + * + * Returns the number of elements in the hash table or + * -1 in case of error + */ +int +xmlHashSize(xmlHashTablePtr table) { + if (table == NULL) + return(-1); + return(table->nbElems); +} + +/** + * xmlHashRemoveEntry: + * @table: the hash table + * @name: the name of the userdata + * @f: the deallocator function for removed item (if any) + * + * Find the userdata specified by the @name and remove + * it from the hash @table. Existing userdata for this tuple will be removed + * and freed with @f. + * + * Returns 0 if the removal succeeded and -1 in case of error or not found. + */ +int xmlHashRemoveEntry(xmlHashTablePtr table, const xmlChar *name, + xmlHashDeallocator f) { + return(xmlHashRemoveEntry3(table, name, NULL, NULL, f)); +} + +/** + * xmlHashRemoveEntry2: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @f: the deallocator function for removed item (if any) + * + * Find the userdata specified by the (@name, @name2) tuple and remove + * it from the hash @table. Existing userdata for this tuple will be removed + * and freed with @f. + * + * Returns 0 if the removal succeeded and -1 in case of error or not found. + */ +int +xmlHashRemoveEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, xmlHashDeallocator f) { + return(xmlHashRemoveEntry3(table, name, name2, NULL, f)); +} + +/** + * xmlHashRemoveEntry3: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @name3: a third name of the userdata + * @f: the deallocator function for removed item (if any) + * + * Find the userdata specified by the (@name, @name2, @name3) tuple and remove + * it from the hash @table. Existing userdata for this tuple will be removed + * and freed with @f. + * + * Returns 0 if the removal succeeded and -1 in case of error or not found. + */ +int +xmlHashRemoveEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, xmlHashDeallocator f) { + unsigned long key; + xmlHashEntryPtr entry; + xmlHashEntryPtr prev = NULL; + + if (table == NULL || name == NULL) + return(-1); + + key = xmlHashComputeKey(table, name, name2, name3); + if (table->table[key].valid == 0) { + return(-1); + } else { + for (entry = &(table->table[key]); entry != NULL; entry = entry->next) { + if (xmlStrEqual(entry->name, name) && + xmlStrEqual(entry->name2, name2) && + xmlStrEqual(entry->name3, name3)) { + if(f) + f(entry->payload, entry->name); + entry->payload = NULL; + if(entry->name) + xmlFree(entry->name); + if(entry->name2) + xmlFree(entry->name2); + if(entry->name3) + xmlFree(entry->name3); + if(prev) { + prev->next = entry->next; + xmlFree(entry); + } else { + if (entry->next == NULL) { + entry->valid = 0; + } else { + entry = entry->next; + memcpy(&(table->table[key]), entry, sizeof(xmlHashEntry)); + xmlFree(entry); + } + } + table->nbElems--; + return(0); + } + prev = entry; + } + return(-1); + } +} + diff --git a/bundle/libxml/include/libxml/DOCBparser.h b/bundle/libxml/include/libxml/DOCBparser.h new file mode 100644 index 0000000000..4b7230f17d --- /dev/null +++ b/bundle/libxml/include/libxml/DOCBparser.h @@ -0,0 +1,73 @@ +/* + * DOCBparser.h : interface for a DocBook SGML non-verifying parser + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __DOCB_PARSER_H__ +#define __DOCB_PARSER_H__ +#include <libxml/parser.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Most of the back-end structures from XML and SGML are shared. + */ +typedef xmlParserCtxt docbParserCtxt; +typedef xmlParserCtxtPtr docbParserCtxtPtr; +typedef xmlParserNodeInfo docbParserNodeInfo; +typedef xmlSAXHandler docbSAXHandler; +typedef xmlSAXHandlerPtr docbSAXHandlerPtr; +typedef xmlParserInput docbParserInput; +typedef xmlParserInputPtr docbParserInputPtr; +typedef xmlDocPtr docbDocPtr; +typedef xmlNodePtr docbNodePtr; + +/* + * There is only few public functions. + */ +int docbEncodeEntities(unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen, int quoteChar); + +docbDocPtr docbSAXParseDoc (xmlChar *cur, + const char *encoding, + docbSAXHandlerPtr sax, + void *userData); +docbDocPtr docbParseDoc (xmlChar *cur, + const char *encoding); +docbDocPtr docbSAXParseFile(const char *filename, + const char *encoding, + docbSAXHandlerPtr sax, + void *userData); +docbDocPtr docbParseFile (const char *filename, + const char *encoding); + +/** + * Interfaces for the Push mode. + */ +void docbFreeParserCtxt (docbParserCtxtPtr ctxt); +docbParserCtxtPtr docbCreatePushParserCtxt(docbSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename, + xmlCharEncoding enc); +int docbParseChunk (docbParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); +docbParserCtxtPtr docbCreateFileParserCtxt(const char *filename, + const char *encoding); +int docbParseDocument (docbParserCtxtPtr ctxt); + +#ifdef __cplusplus +} +#endif + +#endif /* __DOCB_PARSER_H__ */ diff --git a/bundle/libxml/include/libxml/HTMLparser.h b/bundle/libxml/include/libxml/HTMLparser.h new file mode 100644 index 0000000000..f09213711e --- /dev/null +++ b/bundle/libxml/include/libxml/HTMLparser.h @@ -0,0 +1,117 @@ +/* + * HTMLparser.h : interface for an HTML 4.0 non-verifying parser + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __HTML_PARSER_H__ +#define __HTML_PARSER_H__ +#include <libxml/parser.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Most of the back-end structures from XML and HTML are shared. + */ +typedef xmlParserCtxt htmlParserCtxt; +typedef xmlParserCtxtPtr htmlParserCtxtPtr; +typedef xmlParserNodeInfo htmlParserNodeInfo; +typedef xmlSAXHandler htmlSAXHandler; +typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; +typedef xmlParserInput htmlParserInput; +typedef xmlParserInputPtr htmlParserInputPtr; +typedef xmlDocPtr htmlDocPtr; +typedef xmlNodePtr htmlNodePtr; + +/* + * Internal description of an HTML element. + */ +typedef struct _htmlElemDesc htmlElemDesc; +typedef htmlElemDesc *htmlElemDescPtr; +struct _htmlElemDesc { + const char *name; /* The tag name */ + char startTag; /* Whether the start tag can be implied */ + char endTag; /* Whether the end tag can be implied */ + char saveEndTag; /* Whether the end tag should be saved */ + char empty; /* Is this an empty element ? */ + char depr; /* Is this a deprecated element ? */ + char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ + char isinline; /* is this a block 0 or inline 1 element */ + const char *desc; /* the description */ +}; + +/* + * Internal description of an HTML entity. + */ +typedef struct _htmlEntityDesc htmlEntityDesc; +typedef htmlEntityDesc *htmlEntityDescPtr; +struct _htmlEntityDesc { + unsigned int value; /* the UNICODE value for the character */ + const char *name; /* The entity name */ + const char *desc; /* the description */ +}; + +/* + * There is only few public functions. + */ +const htmlElemDesc * htmlTagLookup (const xmlChar *tag); +const htmlEntityDesc * htmlEntityLookup(const xmlChar *name); +const htmlEntityDesc * htmlEntityValueLookup(unsigned int value); + +int htmlIsAutoClosed(htmlDocPtr doc, + htmlNodePtr elem); +int htmlAutoCloseTag(htmlDocPtr doc, + const xmlChar *name, + htmlNodePtr elem); +const htmlEntityDesc * htmlParseEntityRef(htmlParserCtxtPtr ctxt, + xmlChar **str); +int htmlParseCharRef(htmlParserCtxtPtr ctxt); +void htmlParseElement(htmlParserCtxtPtr ctxt); + +int htmlParseDocument(htmlParserCtxtPtr ctxt); +htmlDocPtr htmlSAXParseDoc (xmlChar *cur, + const char *encoding, + htmlSAXHandlerPtr sax, + void *userData); +htmlDocPtr htmlParseDoc (xmlChar *cur, + const char *encoding); +htmlDocPtr htmlSAXParseFile(const char *filename, + const char *encoding, + htmlSAXHandlerPtr sax, + void *userData); +htmlDocPtr htmlParseFile (const char *filename, + const char *encoding); +int UTF8ToHtml (unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen); +int htmlEncodeEntities(unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen, int quoteChar); +int htmlIsScriptAttribute(const xmlChar *name); +int htmlHandleOmittedElem(int val); + +/** + * Interfaces for the Push mode. + */ +void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); +htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename, + xmlCharEncoding enc); +int htmlParseChunk (htmlParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); +#ifdef __cplusplus +} +#endif + +#endif /* __HTML_PARSER_H__ */ diff --git a/bundle/libxml/include/libxml/HTMLtree.h b/bundle/libxml/include/libxml/HTMLtree.h new file mode 100644 index 0000000000..3a441c4b5c --- /dev/null +++ b/bundle/libxml/include/libxml/HTMLtree.h @@ -0,0 +1,117 @@ +/* + * HTMLtree.h : describes the structures found in an tree resulting + * from an XML parsing. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __HTML_TREE_H__ +#define __HTML_TREE_H__ + +#include <stdio.h> +#include <libxml/tree.h> +#include <libxml/HTMLparser.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * HTML_TEXT_NODE: + * + * Macro. A text node in a HTML document is really implemented + * the same way as a text node in an XML document. + */ +#define HTML_TEXT_NODE XML_TEXT_NODE +/** + * HTML_ENTITY_REF_NODE: + * + * Macro. An entity reference in a HTML document is really implemented + * the same way as an entity reference in an XML document. + */ +#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE +/** + * HTML_COMMENT_NODE: + * + * Macro. A comment in a HTML document is really implemented + * the same way as a comment in an XML document. + */ +#define HTML_COMMENT_NODE XML_COMMENT_NODE +/** + * HTML_PRESERVE_NODE: + * + * Macro. A preserved node in a HTML document is really implemented + * the same way as a CDATA section in an XML document. + */ +#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE +/** + * HTML_PI_NODE: + * + * Macro. A processing instruction in a HTML document is really implemented + * the same way as a processing instruction in an XML document. + */ +#define HTML_PI_NODE XML_PI_NODE + +htmlDocPtr htmlNewDoc (const xmlChar *URI, + const xmlChar *ExternalID); +htmlDocPtr htmlNewDocNoDtD (const xmlChar *URI, + const xmlChar *ExternalID); +const xmlChar * htmlGetMetaEncoding (htmlDocPtr doc); +int htmlSetMetaEncoding (htmlDocPtr doc, + const xmlChar *encoding); +void htmlDocDumpMemory (xmlDocPtr cur, + xmlChar **mem, + int *size); +int htmlDocDump (FILE *f, + xmlDocPtr cur); +int htmlSaveFile (const char *filename, + xmlDocPtr cur); +int htmlNodeDump (xmlBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur); +void htmlNodeDumpFile (FILE *out, + xmlDocPtr doc, + xmlNodePtr cur); +int htmlNodeDumpFileFormat (FILE *out, + xmlDocPtr doc, + xmlNodePtr cur, + const char *encoding, + int format); +int htmlSaveFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding); +int htmlSaveFileFormat (const char *filename, + xmlDocPtr cur, + const char *encoding, + int format); + +void htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + const char *encoding, + int format); +void htmlDocContentDumpOutput(xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding); +void htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding, + int format); + +int htmlIsBooleanAttr (const xmlChar *name); +void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding); + + + + +#ifdef __cplusplus +} +#endif + +#endif /* __HTML_TREE_H__ */ + diff --git a/bundle/libxml/include/libxml/Makefile.am b/bundle/libxml/include/libxml/Makefile.am new file mode 100644 index 0000000000..3a50bc1fcb --- /dev/null +++ b/bundle/libxml/include/libxml/Makefile.am @@ -0,0 +1,46 @@ +## Process this file with automake to produce Makefile.in + +xmlincdir = $(includedir)/libxml2/libxml + +xmlinc_HEADERS = \ + SAX.h \ + entities.h \ + encoding.h \ + parser.h \ + parserInternals.h \ + xmlerror.h \ + HTMLparser.h \ + HTMLtree.h \ + debugXML.h \ + tree.h \ + list.h \ + hash.h \ + xpath.h \ + xpathInternals.h \ + xpointer.h \ + xinclude.h \ + xmlIO.h \ + xmlmemory.h \ + nanohttp.h \ + nanoftp.h \ + uri.h \ + valid.h \ + xlink.h \ + xmlversion.h \ + DOCBparser.h \ + catalog.h \ + threads.h \ + globals.h \ + c14n.h \ + xmlautomata.h \ + xmlregexp.h \ + xmlschemas.h \ + schemasInternals.h \ + xmlschemastypes.h \ + xmlunicode.h \ + xmlreader.h + +install-exec-hook: + $(mkinstalldirs) $(DESTDIR)$(xmlincdir) + +EXTRA_DIST = xmlversion.h.in diff --git a/bundle/libxml/include/libxml/Makefile.in b/bundle/libxml/include/libxml/Makefile.in new file mode 100644 index 0000000000..af1022c53a --- /dev/null +++ b/bundle/libxml/include/libxml/Makefile.in @@ -0,0 +1,335 @@ +# Makefile.in generated automatically by automake 1.4-p6 from Makefile.am + +# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + + +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include + +DESTDIR = + +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ + +top_builddir = ../.. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +transform = @program_transform_name@ + +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = @host_alias@ +host_triplet = @host@ +AS = @AS@ +C14N_OBJ = @C14N_OBJ@ +CATALOG_OBJ = @CATALOG_OBJ@ +CC = @CC@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +DEBUG_OBJ = @DEBUG_OBJ@ +DLLTOOL = @DLLTOOL@ +DOCB_OBJ = @DOCB_OBJ@ +ECHO = @ECHO@ +EXEEXT = @EXEEXT@ +FTP_OBJ = @FTP_OBJ@ +HAVE_ISINF = @HAVE_ISINF@ +HAVE_ISNAN = @HAVE_ISNAN@ +HTML_DIR = @HTML_DIR@ +HTML_OBJ = @HTML_OBJ@ +HTTP_OBJ = @HTTP_OBJ@ +ICONV_LIBS = @ICONV_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBXML_MAJOR_VERSION = @LIBXML_MAJOR_VERSION@ +LIBXML_MICRO_VERSION = @LIBXML_MICRO_VERSION@ +LIBXML_MINOR_VERSION = @LIBXML_MINOR_VERSION@ +LIBXML_VERSION = @LIBXML_VERSION@ +LIBXML_VERSION_INFO = @LIBXML_VERSION_INFO@ +LIBXML_VERSION_NUMBER = @LIBXML_VERSION_NUMBER@ +LN_S = @LN_S@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MV = @MV@ +M_LIBS = @M_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PYTHON = @PYTHON@ +PYTHON_INCLUDES = @PYTHON_INCLUDES@ +PYTHON_SITE_PACKAGES = @PYTHON_SITE_PACKAGES@ +PYTHON_SUBDIR = @PYTHON_SUBDIR@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +RDL_LIBS = @RDL_LIBS@ +RELDATE = @RELDATE@ +RM = @RM@ +STRIP = @STRIP@ +TAR = @TAR@ +TEST_REGEXPS = @TEST_REGEXPS@ +TEST_SCHEMAS = @TEST_SCHEMAS@ +TEST_THREADS = @TEST_THREADS@ +THREAD_CFLAGS = @THREAD_CFLAGS@ +THREAD_LIBS = @THREAD_LIBS@ +U = @U@ +VERSION = @VERSION@ +WITH_C14N = @WITH_C14N@ +WITH_CATALOG = @WITH_CATALOG@ +WITH_DEBUG = @WITH_DEBUG@ +WITH_DOCB = @WITH_DOCB@ +WITH_FTP = @WITH_FTP@ +WITH_HTML = @WITH_HTML@ +WITH_HTTP = @WITH_HTTP@ +WITH_ICONV = @WITH_ICONV@ +WITH_MEM_DEBUG = @WITH_MEM_DEBUG@ +WITH_REGEXPS = @WITH_REGEXPS@ +WITH_SCHEMAS = @WITH_SCHEMAS@ +WITH_THREADS = @WITH_THREADS@ +WITH_TRIO = @WITH_TRIO@ +WITH_XINCLUDE = @WITH_XINCLUDE@ +WITH_XPATH = @WITH_XPATH@ +WITH_XPTR = @WITH_XPTR@ +XINCLUDE_OBJ = @XINCLUDE_OBJ@ +XML_CFLAGS = @XML_CFLAGS@ +XML_INCLUDEDIR = @XML_INCLUDEDIR@ +XML_LIBDIR = @XML_LIBDIR@ +XML_LIBS = @XML_LIBS@ +XPATH_OBJ = @XPATH_OBJ@ +XPTR_OBJ = @XPTR_OBJ@ +Z_CFLAGS = @Z_CFLAGS@ +Z_LIBS = @Z_LIBS@ + +xmlincdir = $(includedir)/libxml2/libxml + +xmlinc_HEADERS = \ + SAX.h \ + entities.h \ + encoding.h \ + parser.h \ + parserInternals.h \ + xmlerror.h \ + HTMLparser.h \ + HTMLtree.h \ + debugXML.h \ + tree.h \ + list.h \ + hash.h \ + xpath.h \ + xpathInternals.h \ + xpointer.h \ + xinclude.h \ + xmlIO.h \ + xmlmemory.h \ + nanohttp.h \ + nanoftp.h \ + uri.h \ + valid.h \ + xlink.h \ + xmlversion.h \ + DOCBparser.h \ + catalog.h \ + threads.h \ + globals.h \ + c14n.h \ + xmlautomata.h \ + xmlregexp.h \ + xmlschemas.h \ + schemasInternals.h \ + xmlschemastypes.h \ + xmlunicode.h \ + xmlreader.h + + +EXTRA_DIST = xmlversion.h.in +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = ../../config.h +CONFIG_CLEAN_FILES = xmlversion.h +HEADERS = $(xmlinc_HEADERS) + +DIST_COMMON = Makefile.am Makefile.in xmlversion.h.in + + +DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) + +GZIP_ENV = --best +all: all-redirect +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && $(AUTOMAKE) --gnu --include-deps include/libxml/Makefile + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +xmlversion.h: $(top_builddir)/config.status xmlversion.h.in + cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +install-xmlincHEADERS: $(xmlinc_HEADERS) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(xmlincdir) + @list='$(xmlinc_HEADERS)'; for p in $$list; do \ + if test -f "$$p"; then d= ; else d="$(srcdir)/"; fi; \ + echo " $(INSTALL_DATA) $$d$$p $(DESTDIR)$(xmlincdir)/$$p"; \ + $(INSTALL_DATA) $$d$$p $(DESTDIR)$(xmlincdir)/$$p; \ + done + +uninstall-xmlincHEADERS: + @$(NORMAL_UNINSTALL) + list='$(xmlinc_HEADERS)'; for p in $$list; do \ + rm -f $(DESTDIR)$(xmlincdir)/$$p; \ + done + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) + list='$(SOURCES) $(HEADERS)'; \ + unique=`for i in $$list; do echo $$i; done | \ + awk ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + here=`pwd` && cd $(srcdir) \ + && mkid -f$$here/ID $$unique $(LISP) + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS)'; \ + unique=`for i in $$list; do echo $$i; done | \ + awk ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ + || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) + +mostlyclean-tags: + +clean-tags: + +distclean-tags: + -rm -f TAGS ID + +maintainer-clean-tags: + +distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) + +subdir = include/libxml + +distdir: $(DISTFILES) + @for file in $(DISTFILES); do \ + d=$(srcdir); \ + if test -d $$d/$$file; then \ + cp -pr $$d/$$file $(distdir)/$$file; \ + else \ + test -f $(distdir)/$$file \ + || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ + || cp -p $$d/$$file $(distdir)/$$file || :; \ + fi; \ + done +info-am: +info: info-am +dvi-am: +dvi: dvi-am +check-am: all-am +check: check-am +installcheck-am: +installcheck: installcheck-am +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-exec: install-exec-am + +install-data-am: install-xmlincHEADERS +install-data: install-data-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am +install: install-am +uninstall-am: uninstall-xmlincHEADERS +uninstall: uninstall-am +all-am: Makefile $(HEADERS) +all-redirect: all-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install +installdirs: + $(mkinstalldirs) $(DESTDIR)$(xmlincdir) + + +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + -rm -f config.cache config.log stamp-h stamp-h[0-9]* + +maintainer-clean-generic: +mostlyclean-am: mostlyclean-tags mostlyclean-generic + +mostlyclean: mostlyclean-am + +clean-am: clean-tags clean-generic mostlyclean-am + +clean: clean-am + +distclean-am: distclean-tags distclean-generic clean-am + -rm -f libtool + +distclean: distclean-am + +maintainer-clean-am: maintainer-clean-tags maintainer-clean-generic \ + distclean-am + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +maintainer-clean: maintainer-clean-am + +.PHONY: uninstall-xmlincHEADERS install-xmlincHEADERS tags \ +mostlyclean-tags distclean-tags clean-tags maintainer-clean-tags \ +distdir info-am info dvi-am dvi check check-am installcheck-am \ +installcheck install-exec-am install-exec install-data-am install-data \ +install-am install uninstall-am uninstall all-redirect all-am all \ +installdirs mostlyclean-generic distclean-generic clean-generic \ +maintainer-clean-generic clean mostlyclean distclean maintainer-clean + + +install-exec-hook: + $(mkinstalldirs) $(DESTDIR)$(xmlincdir) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/bundle/libxml/include/libxml/SAX.h b/bundle/libxml/include/libxml/SAX.h new file mode 100644 index 0000000000..d96d9e9596 --- /dev/null +++ b/bundle/libxml/include/libxml/SAX.h @@ -0,0 +1,128 @@ +/* + * SAX.h : Default SAX handler interfaces. + * + * See Copyright for the status of this software. + * + * Daniel Veillard <daniel@veillard.com> + */ + + +#ifndef __XML_SAX_H__ +#define __XML_SAX_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <libxml/parser.h> +#include <libxml/xlink.h> + +#ifdef __cplusplus +extern "C" { +#endif +const xmlChar * getPublicId (void *ctx); +const xmlChar * getSystemId (void *ctx); +void setDocumentLocator (void *ctx, + xmlSAXLocatorPtr loc); + +int getLineNumber (void *ctx); +int getColumnNumber (void *ctx); + +int isStandalone (void *ctx); +int hasInternalSubset (void *ctx); +int hasExternalSubset (void *ctx); + +void internalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +void externalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlEntityPtr getEntity (void *ctx, + const xmlChar *name); +xmlEntityPtr getParameterEntity (void *ctx, + const xmlChar *name); +xmlParserInputPtr resolveEntity (void *ctx, + const xmlChar *publicId, + const xmlChar *systemId); + +void entityDecl (void *ctx, + const xmlChar *name, + int type, + const xmlChar *publicId, + const xmlChar *systemId, + xmlChar *content); +void attributeDecl (void *ctx, + const xmlChar *elem, + const xmlChar *fullname, + int type, + int def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +void elementDecl (void *ctx, + const xmlChar *name, + int type, + xmlElementContentPtr content); +void notationDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId); +void unparsedEntityDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId, + const xmlChar *notationName); + +void startDocument (void *ctx); +void endDocument (void *ctx); +void attribute (void *ctx, + const xmlChar *fullname, + const xmlChar *value); +void startElement (void *ctx, + const xmlChar *fullname, + const xmlChar **atts); +void endElement (void *ctx, + const xmlChar *name); +void reference (void *ctx, + const xmlChar *name); +void characters (void *ctx, + const xmlChar *ch, + int len); +void ignorableWhitespace (void *ctx, + const xmlChar *ch, + int len); +void processingInstruction (void *ctx, + const xmlChar *target, + const xmlChar *data); +void globalNamespace (void *ctx, + const xmlChar *href, + const xmlChar *prefix); +void setNamespace (void *ctx, + const xmlChar *name); +xmlNsPtr getNamespace (void *ctx); +int checkNamespace (void *ctx, + xmlChar *nameSpace); +void namespaceDecl (void *ctx, + const xmlChar *href, + const xmlChar *prefix); +void comment (void *ctx, + const xmlChar *value); +void cdataBlock (void *ctx, + const xmlChar *value, + int len); + +void initxmlDefaultSAXHandler (xmlSAXHandler *hdlr, + int warning); +#ifdef LIBXML_HTML_ENABLED +void inithtmlDefaultSAXHandler (xmlSAXHandler *hdlr); +#endif +#ifdef LIBXML_DOCB_ENABLED +void initdocbDefaultSAXHandler (xmlSAXHandler *hdlr); +#endif +void xmlDefaultSAXHandlerInit (void); +void htmlDefaultSAXHandlerInit (void); +void docbDefaultSAXHandlerInit (void); +#ifdef __cplusplus +} +#endif +#endif /* __XML_SAX_H__ */ diff --git a/bundle/libxml/include/libxml/c14n.h b/bundle/libxml/include/libxml/c14n.h new file mode 100644 index 0000000000..75ace8a4a4 --- /dev/null +++ b/bundle/libxml/include/libxml/c14n.h @@ -0,0 +1,91 @@ +/* + * "Canonical XML" implementation + * http://www.w3.org/TR/xml-c14n + * + * "Exclusive XML Canonicalization" implementation + * http://www.w3.org/TR/xml-exc-c14n + + * See Copyright for the status of this software. + * + * Author: Aleksey Sanin <aleksey@aleksey.com> + */ +#ifndef __XML_C14N_H__ +#define __XML_C14N_H__ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include <libxml/tree.h> +#include <libxml/xpath.h> + +/* + * XML Canonicazation + * http://www.w3.org/TR/xml-c14n + * + * Exclusive XML Canonicazation + * http://www.w3.org/TR/xml-exc-c14n + * + * Canonical form of an XML document could be created if and only if + * a) default attributes (if any) are added to all nodes + * b) all character and parsed entity references are resolved + * In order to achive this in libxml2 the document MUST be loaded with + * following global setings: + * + * xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS; + * xmlSubstituteEntitiesDefault(1); + * + * or corresponding parser context setting: + * xmlParserCtxtPtr ctxt; + * + * ... + * ctxt->loadsubset = XML_DETECT_IDS | XML_COMPLETE_ATTRS; + * ctxt->replaceEntities = 1; + * ... + */ + + +int xmlC14NDocSaveTo (xmlDocPtr doc, + xmlNodeSetPtr nodes, + int exclusive, + xmlChar **inclusive_ns_prefixes, + int with_comments, + xmlOutputBufferPtr buf); + +int xmlC14NDocDumpMemory (xmlDocPtr doc, + xmlNodeSetPtr nodes, + int exclusive, + xmlChar **inclusive_ns_prefixes, + int with_comments, + xmlChar **doc_txt_ptr); + +int xmlC14NDocSave (xmlDocPtr doc, + xmlNodeSetPtr nodes, + int exclusive, + xmlChar **inclusive_ns_prefixes, + int with_comments, + const char* filename, + int compression); + + +/** + * This is the core C14N function + */ +typedef int (*xmlC14NIsVisibleCallback) (void* user_data, + xmlNodePtr node, + xmlNodePtr parent); + +int xmlC14NExecute (xmlDocPtr doc, + xmlC14NIsVisibleCallback is_visible_callback, + void* user_data, + int exclusive, + xmlChar **inclusive_ns_prefixes, + int with_comments, + xmlOutputBufferPtr buf); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __XML_C14N_H__ */ + diff --git a/bundle/libxml/include/libxml/catalog.h b/bundle/libxml/include/libxml/catalog.h new file mode 100644 index 0000000000..037e7e80e0 --- /dev/null +++ b/bundle/libxml/include/libxml/catalog.h @@ -0,0 +1,138 @@ +/** + * catalog.h: interfaces of the Catalog handling system + * + * Reference: SGML Open Technical Resolution TR9401:1997. + * http://www.jclark.com/sp/catalog.htm + * + * XML Catalogs Working Draft 12 Jun 2001 + * http://www.oasis-open.org/committees/entity/spec-2001-06-12.html + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_CATALOG_H__ +#define __XML_CATALOG_H__ + +#include <stdio.h> + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_CATALOG_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XML_CATALOGS_NAMESPACE: + * + * The namespace for the XML Catalogs elements. + */ +#define XML_CATALOGS_NAMESPACE \ + (const xmlChar *) "urn:oasis:names:tc:entity:xmlns:xml:catalog" +/** + * XML_CATALOG_PI: + * + * The specific XML Catalog Processing Instuction name. + */ +#define XML_CATALOG_PI \ + (const xmlChar *) "oasis-xml-catalog" + +/* + * The API is voluntarily limited to general cataloging. + */ +typedef enum { + XML_CATA_PREFER_NONE = 0, + XML_CATA_PREFER_PUBLIC = 1, + XML_CATA_PREFER_SYSTEM +} xmlCatalogPrefer; + +typedef enum { + XML_CATA_ALLOW_NONE = 0, + XML_CATA_ALLOW_GLOBAL = 1, + XML_CATA_ALLOW_DOCUMENT = 2, + XML_CATA_ALLOW_ALL = 3 +} xmlCatalogAllow; + +typedef struct _xmlCatalog xmlCatalog; +typedef xmlCatalog *xmlCatalogPtr; + +/* + * Operations on a given catalog. + */ +xmlCatalogPtr xmlNewCatalog (int sgml); +xmlCatalogPtr xmlLoadACatalog (const char *filename); +xmlCatalogPtr xmlLoadSGMLSuperCatalog (const char *filename); +int xmlConvertSGMLCatalog (xmlCatalogPtr catal); +int xmlACatalogAdd (xmlCatalogPtr catal, + const xmlChar *type, + const xmlChar *orig, + const xmlChar *replace); +int xmlACatalogRemove (xmlCatalogPtr catal, + const xmlChar *value); +xmlChar * xmlACatalogResolve (xmlCatalogPtr catal, + const xmlChar *pubID, + const xmlChar *sysID); +xmlChar * xmlACatalogResolveSystem(xmlCatalogPtr catal, + const xmlChar *sysID); +xmlChar * xmlACatalogResolvePublic(xmlCatalogPtr catal, + const xmlChar *pubID); +xmlChar * xmlACatalogResolveURI (xmlCatalogPtr catal, + const xmlChar *URI); +void xmlACatalogDump (xmlCatalogPtr catal, + FILE *out); +void xmlFreeCatalog (xmlCatalogPtr catal); +int xmlCatalogIsEmpty (xmlCatalogPtr catal); + +/* + * Global operations. + */ +void xmlInitializeCatalog (void); +int xmlLoadCatalog (const char *filename); +void xmlLoadCatalogs (const char *paths); +void xmlCatalogCleanup (void); +void xmlCatalogDump (FILE *out); +xmlChar * xmlCatalogResolve (const xmlChar *pubID, + const xmlChar *sysID); +xmlChar * xmlCatalogResolveSystem (const xmlChar *sysID); +xmlChar * xmlCatalogResolvePublic (const xmlChar *pubID); +xmlChar * xmlCatalogResolveURI (const xmlChar *URI); +int xmlCatalogAdd (const xmlChar *type, + const xmlChar *orig, + const xmlChar *replace); +int xmlCatalogRemove (const xmlChar *value); +xmlDocPtr xmlParseCatalogFile (const char *filename); +int xmlCatalogConvert (void); + +/* + * Strictly minimal interfaces for per-document catalogs used + * by the parser. + */ +void xmlCatalogFreeLocal (void *catalogs); +void * xmlCatalogAddLocal (void *catalogs, + const xmlChar *URL); +xmlChar * xmlCatalogLocalResolve (void *catalogs, + const xmlChar *pubID, + const xmlChar *sysID); +xmlChar * xmlCatalogLocalResolveURI(void *catalogs, + const xmlChar *URI); +/* + * Preference settings. + */ +int xmlCatalogSetDebug (int level); +xmlCatalogPrefer xmlCatalogSetDefaultPrefer(xmlCatalogPrefer prefer); +void xmlCatalogSetDefaults (xmlCatalogAllow allow); +xmlCatalogAllow xmlCatalogGetDefaults (void); + + +/* DEPRECATED interfaces */ +const xmlChar * xmlCatalogGetSystem (const xmlChar *sysID); +const xmlChar * xmlCatalogGetPublic (const xmlChar *pubID); + +#ifdef __cplusplus +} +#endif +#endif /* LIBXML_CATALOG_ENABLED */ +#endif /* __XML_CATALOG_H__ */ diff --git a/bundle/libxml/include/libxml/debugXML.h b/bundle/libxml/include/libxml/debugXML.h new file mode 100644 index 0000000000..cf017a4a73 --- /dev/null +++ b/bundle/libxml/include/libxml/debugXML.h @@ -0,0 +1,163 @@ +/* + * debugXML.h : Interfaces to a set of routines used for debugging the tree + * produced by the XML parser. + * + * Daniel Veillard <daniel@veillard.com> + */ + +#ifndef __DEBUG_XML__ +#define __DEBUG_XML__ +#include <stdio.h> +#include <libxml/tree.h> + +#ifdef LIBXML_DEBUG_ENABLED + +#include <libxml/xpath.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The standard Dump routines. + */ +void xmlDebugDumpString (FILE *output, + const xmlChar *str); +void xmlDebugDumpAttr (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpAttrList (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpOneNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNodeList (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpDocumentHead(FILE *output, + xmlDocPtr doc); +void xmlDebugDumpDocument (FILE *output, + xmlDocPtr doc); +void xmlDebugDumpDTD (FILE *output, + xmlDtdPtr dtd); +void xmlDebugDumpEntities (FILE *output, + xmlDocPtr doc); + +void xmlLsOneNode (FILE *output, xmlNodePtr node); +int xmlLsCountNode (xmlNodePtr node); + +LIBXML_DLL_IMPORT const char *xmlBoolToText (int boolval); + +/**************************************************************** + * * + * The XML shell related structures and functions * + * * + ****************************************************************/ + +/** + * xmlShellReadlineFunc: + * @prompt: a string prompt + * + * This is a generic signature for the XML shell input function. + * + * Returns a string which will be freed by the Shell. + */ +typedef char * (* xmlShellReadlineFunc)(char *prompt); + +/** + * xmlShellCtxt: + * + * A debugging shell context. + * TODO: add the defined function tables. + */ +typedef struct _xmlShellCtxt xmlShellCtxt; +typedef xmlShellCtxt *xmlShellCtxtPtr; +struct _xmlShellCtxt { + char *filename; + xmlDocPtr doc; + xmlNodePtr node; + xmlXPathContextPtr pctxt; + int loaded; + FILE *output; + xmlShellReadlineFunc input; +}; + +/** + * xmlShellCmd: + * @ctxt: a shell context + * @arg: a string argument + * @node: a first node + * @node2: a second node + * + * This is a generic signature for the XML shell functions. + * + * Returns an int, negative returns indicating errors. + */ +typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); + +void xmlShellPrintXPathError (int errorType, + const char *arg); +void xmlShellPrintNode (xmlNodePtr node); +void xmlShellPrintXPathResult(xmlXPathObjectPtr list); +int xmlShellList (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); +int xmlShellBase (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); +int xmlShellDir (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); +int xmlShellCat (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); +int xmlShellLoad (xmlShellCtxtPtr ctxt, + char *filename, + xmlNodePtr node, + xmlNodePtr node2); +int xmlShellWrite (xmlShellCtxtPtr ctxt, + char *filename, + xmlNodePtr node, + xmlNodePtr node2); +int xmlShellSave (xmlShellCtxtPtr ctxt, + char *filename, + xmlNodePtr node, + xmlNodePtr node2); +int xmlShellValidate (xmlShellCtxtPtr ctxt, + char *dtd, + xmlNodePtr node, + xmlNodePtr node2); +int xmlShellDu (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr tree, + xmlNodePtr node2); +int xmlShellPwd (xmlShellCtxtPtr ctxt, + char *buffer, + xmlNodePtr node, + xmlNodePtr node2); + +/* + * The Shell interface. + */ +void xmlShell (xmlDocPtr doc, + char *filename, + xmlShellReadlineFunc input, + FILE *output); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_DEBUG_ENABLED */ +#endif /* __DEBUG_XML__ */ diff --git a/bundle/libxml/include/libxml/encoding.h b/bundle/libxml/include/libxml/encoding.h new file mode 100644 index 0000000000..3c0fbb91ff --- /dev/null +++ b/bundle/libxml/include/libxml/encoding.h @@ -0,0 +1,230 @@ +/* + * encoding.h : interface for the encoding conversion functions needed for + * XML + * + * Related specs: + * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies + * [ISO-10646] UTF-8 and UTF-16 in Annexes + * [ISO-8859-1] ISO Latin-1 characters codes. + * [UNICODE] The Unicode Consortium, "The Unicode Standard -- + * Worldwide Character Encoding -- Version 1.0", Addison- + * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is + * described in Unicode Technical Report #4. + * [US-ASCII] Coded Character Set--7-bit American Standard Code for + * Information Interchange, ANSI X3.4-1986. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_CHAR_ENCODING_H__ +#define __XML_CHAR_ENCODING_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_ICONV_ENABLED +#include <iconv.h> +#endif +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlCharEncoding: + * + * Predefined values for some standard encodings. + * Libxml don't do beforehand translation on UTF8, ISOLatinX. + * It also support UTF16 (LE and BE) by default. + * + * Anything else would have to be translated to UTF8 before being + * given to the parser itself. The BOM for UTF16 and the encoding + * declaration are looked at and a converter is looked for at that + * point. If not found the parser stops here as asked by the XML REC + * Converter can be registered by the user using xmlRegisterCharEncodingHandler + * but the current form doesn't allow stateful transcoding (a serious + * problem agreed !). If iconv has been found it will be used + * automatically and allow stateful transcoding, the simplest is then + * to be sure to enable icon and to provide iconv libs for the encoding + * support needed. + */ +typedef enum { + XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ + XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ + XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ + XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ + XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ + XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ + XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ + XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ + XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ + XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ + XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ + XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ + XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ + XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ + XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ + XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ + XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ + XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ + XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ + XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ + XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ + XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ + XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ + XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ +} xmlCharEncoding; + +/** + * xmlCharEncodingInputFunc: + * @out: a pointer to an array of bytes to store the UTF-8 result + * @outlen: the length of @out + * @in: a pointer to an array of chars in the original encoding + * @inlen: the length of @in + * + * Take a block of chars in the original encoding and try to convert + * it to an UTF-8 block of chars out. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of octets consumed. + */ +typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen, + const unsigned char *in, int *inlen); + + +/** + * xmlCharEncodingOutputFunc: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an other + * encoding. + * Note: a first call designed to produce heading info is called with + * in = NULL. If stateful this should also initialize the encoder state. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, + const unsigned char *in, int *inlen); + + +/* + * Block defining the handlers for non UTF-8 encodings. + * If iconv is supported, there is two extra fields. + */ + +typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; +typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; +struct _xmlCharEncodingHandler { + char *name; + xmlCharEncodingInputFunc input; + xmlCharEncodingOutputFunc output; +#ifdef LIBXML_ICONV_ENABLED + iconv_t iconv_in; + iconv_t iconv_out; +#endif /* LIBXML_ICONV_ENABLED */ +}; + +#ifdef __cplusplus +} +#endif +#include <libxml/tree.h> +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Interfaces for encoding handlers. + */ +void xmlInitCharEncodingHandlers (void); +void xmlCleanupCharEncodingHandlers (void); +void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); +xmlCharEncodingHandlerPtr + xmlGetCharEncodingHandler (xmlCharEncoding enc); +xmlCharEncodingHandlerPtr + xmlFindCharEncodingHandler (const char *name); +xmlCharEncodingHandlerPtr + xmlNewCharEncodingHandler (const char *name, + xmlCharEncodingInputFunc input, + xmlCharEncodingOutputFunc output); + +/* + * Interfaces for encoding names and aliases. + */ +int xmlAddEncodingAlias (const char *name, + const char *alias); +int xmlDelEncodingAlias (const char *alias); +const char * + xmlGetEncodingAlias (const char *alias); +void xmlCleanupEncodingAliases (void); +xmlCharEncoding + xmlParseCharEncoding (const char *name); +const char * + xmlGetCharEncodingName (xmlCharEncoding enc); + +/* + * Interfaces directly used by the parsers. + */ +xmlCharEncoding + xmlDetectCharEncoding (const unsigned char *in, + int len); + +int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); + +int xmlCharEncInFunc (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); +int xmlCharEncFirstLine (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); +int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); + +/* + * Export a few useful functions + */ +int UTF8Toisolat1 (unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen); +int isolat1ToUTF8 (unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen); +int xmlGetUTF8Char (const unsigned char *utf, + int *len); +/* + * exports additional "UTF-8 aware" string routines which are. + */ + +int xmlCheckUTF8 (const unsigned char *utf); + +int xmlUTF8Strsize (const xmlChar *utf, + int len); +xmlChar * xmlUTF8Strndup (const xmlChar *utf, + int len); +xmlChar * xmlUTF8Strpos (const xmlChar *utf, + int pos); +int xmlUTF8Strloc (const xmlChar *utf, + const xmlChar *utfchar); +xmlChar * xmlUTF8Strsub (const xmlChar *utf, + int start, + int len); + +int xmlUTF8Strlen (const xmlChar *utf); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_CHAR_ENCODING_H__ */ diff --git a/bundle/libxml/include/libxml/entities.h b/bundle/libxml/include/libxml/entities.h new file mode 100644 index 0000000000..480009c0e3 --- /dev/null +++ b/bundle/libxml/include/libxml/entities.h @@ -0,0 +1,109 @@ +/* + * entities.h : interface for the XML entities handling + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_ENTITIES_H__ +#define __XML_ENTITIES_H__ + +#include <libxml/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The different valid entity types. + */ +typedef enum { + XML_INTERNAL_GENERAL_ENTITY = 1, + XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2, + XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3, + XML_INTERNAL_PARAMETER_ENTITY = 4, + XML_EXTERNAL_PARAMETER_ENTITY = 5, + XML_INTERNAL_PREDEFINED_ENTITY = 6 +} xmlEntityType; + +/* + * An unit of storage for an entity, contains the string, the value + * and the linkind data needed for the linking in the hash table. + */ + +struct _xmlEntity { + void *_private; /* application data */ + xmlElementType type; /* XML_ENTITY_DECL, must be second ! */ + const xmlChar *name; /* Entity name */ + struct _xmlNode *children; /* First child link */ + struct _xmlNode *last; /* Last child link */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + xmlChar *orig; /* content without ref substitution */ + xmlChar *content; /* content or ndata if unparsed */ + int length; /* the content length */ + xmlEntityType etype; /* The entity type */ + const xmlChar *ExternalID; /* External identifier for PUBLIC */ + const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ + + struct _xmlEntity *nexte; /* unused */ + const xmlChar *URI; /* the full URI as computed */ +}; + +/* + * All entities are stored in an hash table. + * There is 2 separate hash tables for global and parameter entities. + */ + +typedef struct _xmlHashTable xmlEntitiesTable; +typedef xmlEntitiesTable *xmlEntitiesTablePtr; + +/* + * External functions: + */ + +void xmlInitializePredefinedEntities (void); +xmlEntityPtr xmlAddDocEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +xmlEntityPtr xmlAddDtdEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +xmlEntityPtr xmlGetPredefinedEntity (const xmlChar *name); +xmlEntityPtr xmlGetDocEntity (xmlDocPtr doc, + const xmlChar *name); +xmlEntityPtr xmlGetDtdEntity (xmlDocPtr doc, + const xmlChar *name); +xmlEntityPtr xmlGetParameterEntity (xmlDocPtr doc, + const xmlChar *name); +const xmlChar * xmlEncodeEntities (xmlDocPtr doc, + const xmlChar *input); +xmlChar * xmlEncodeEntitiesReentrant(xmlDocPtr doc, + const xmlChar *input); +xmlChar * xmlEncodeSpecialChars (xmlDocPtr doc, + const xmlChar *input); +xmlEntitiesTablePtr xmlCreateEntitiesTable (void); +xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table); +void xmlFreeEntitiesTable (xmlEntitiesTablePtr table); +void xmlDumpEntitiesTable (xmlBufferPtr buf, + xmlEntitiesTablePtr table); +void xmlDumpEntityDecl (xmlBufferPtr buf, + xmlEntityPtr ent); +void xmlCleanupPredefinedEntities(void); + + +#ifdef __cplusplus +} +#endif + +# endif /* __XML_ENTITIES_H__ */ diff --git a/bundle/libxml/include/libxml/globals.h b/bundle/libxml/include/libxml/globals.h new file mode 100644 index 0000000000..fc6c01e53c --- /dev/null +++ b/bundle/libxml/include/libxml/globals.h @@ -0,0 +1,336 @@ +/* + * globals.h: interface for all global variables of the library + * + * The bottom of this file is automatically generated by build_glob.py + * based on the description file global.data + * + * See Copyright for the status of this software. + * + * Gary Pennington <Gary.Pennington@uk.sun.com> + * daniel@veillard.com + */ + +#ifndef __XML_GLOBALS_H +#define __XML_GLOBALS_H + +#include <libxml/parser.h> +#include <libxml/xmlerror.h> +#include <libxml/SAX.h> +#include <libxml/xmlmemory.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Externally global symbols which need to be protected for backwards + * compatibility support. + */ + +#undef docbDefaultSAXHandler +#undef htmlDefaultSAXHandler +#undef oldXMLWDcompatibility +#undef xmlBufferAllocScheme +#undef xmlDefaultBufferSize +#undef xmlDefaultSAXHandler +#undef xmlDefaultSAXLocator +#undef xmlDoValidityCheckingDefaultValue +#undef xmlFree +#undef xmlGenericError +#undef xmlGenericErrorContext +#undef xmlGetWarningsDefaultValue +#undef xmlIndentTreeOutput +#undef xmlTreeIndentString +#undef xmlKeepBlanksDefaultValue +#undef xmlLineNumbersDefaultValue +#undef xmlLoadExtDtdDefaultValue +#undef xmlMalloc +#undef xmlMemStrdup +#undef xmlParserDebugEntities +#undef xmlParserVersion +#undef xmlPedanticParserDefaultValue +#undef xmlRealloc +#undef xmlSaveNoEmptyTags +#undef xmlSubstituteEntitiesDefaultValue + +typedef struct _xmlGlobalState xmlGlobalState; +typedef xmlGlobalState *xmlGlobalStatePtr; +struct _xmlGlobalState +{ + const char *xmlParserVersion; + + xmlSAXLocator xmlDefaultSAXLocator; + xmlSAXHandler xmlDefaultSAXHandler; + xmlSAXHandler docbDefaultSAXHandler; + xmlSAXHandler htmlDefaultSAXHandler; + + xmlFreeFunc xmlFree; + xmlMallocFunc xmlMalloc; + xmlStrdupFunc xmlMemStrdup; + xmlReallocFunc xmlRealloc; + + xmlGenericErrorFunc xmlGenericError; + void *xmlGenericErrorContext; + + int oldXMLWDcompatibility; + + xmlBufferAllocationScheme xmlBufferAllocScheme; + int xmlDefaultBufferSize; + + int xmlSubstituteEntitiesDefaultValue; + int xmlDoValidityCheckingDefaultValue; + int xmlGetWarningsDefaultValue; + int xmlKeepBlanksDefaultValue; + int xmlLineNumbersDefaultValue; + int xmlLoadExtDtdDefaultValue; + int xmlParserDebugEntities; + int xmlPedanticParserDefaultValue; + + int xmlSaveNoEmptyTags; + int xmlIndentTreeOutput; + const char *xmlTreeIndentString; +}; + +#ifdef __cplusplus +} +#endif +#include <libxml/threads.h> +#ifdef __cplusplus +extern "C" { +#endif + +void xmlInitializeGlobalState(xmlGlobalStatePtr gs); + +/* + * In general the memory allocation entry points are not kept + * thread specific but this can be overridden by LIBXML_THREAD_ALLOC_ENABLED + * - xmlMalloc + * - xmlRealloc + * - xmlMemStrdup + * - xmlFree + */ + +#ifdef LIBXML_THREAD_ALLOC_ENABLED +#ifdef LIBXML_THREAD_ENABLED +extern xmlMallocFunc *__xmlMalloc(void); +#define xmlMalloc \ +(*(__xmlMalloc())) +#else +LIBXML_DLL_IMPORT extern xmlMallocFunc xmlMalloc; +#endif + +#ifdef LIBXML_THREAD_ENABLED +extern xmlReallocFunc *__xmlRealloc(void); +#define xmlRealloc \ +(*(__xmlRealloc())) +#else +LIBXML_DLL_IMPORT extern xmlReallocFunc xmlRealloc; +#endif + +#ifdef LIBXML_THREAD_ENABLED +extern xmlFreeFunc *__xmlFree(void); +#define xmlFree \ +(*(__xmlFree())) +#else +LIBXML_DLL_IMPORT extern xmlFreeFunc xmlFree; +#endif + +#ifdef LIBXML_THREAD_ENABLED +extern xmlStrdupFunc *__xmlMemStrdup(void); +#define xmlMemStrdup \ +(*(__xmlMemStrdup())) +#else +LIBXML_DLL_IMPORT extern xmlStrdupFunc xmlMemStrdup; +#endif +#else /* !LIBXML_THREAD_ALLOC_ENABLED */ +LIBXML_DLL_IMPORT extern xmlMallocFunc xmlMalloc; +LIBXML_DLL_IMPORT extern xmlReallocFunc xmlRealloc; +LIBXML_DLL_IMPORT extern xmlFreeFunc xmlFree; +LIBXML_DLL_IMPORT extern xmlStrdupFunc xmlMemStrdup; +#endif /* LIBXML_THREAD_ALLOC_ENABLED */ + +#ifdef LIBXML_DOCB_ENABLED +extern xmlSAXHandler *__docbDefaultSAXHandler(void); +#ifdef LIBXML_THREAD_ENABLED +#define docbDefaultSAXHandler \ +(*(__docbDefaultSAXHandler())) +#else +LIBXML_DLL_IMPORT extern xmlSAXHandler docbDefaultSAXHandler; +#endif +#endif + +#ifdef LIBXML_HTML_ENABLED +extern xmlSAXHandler *__htmlDefaultSAXHandler(void); +#ifdef LIBXML_THREAD_ENABLED +#define htmlDefaultSAXHandler \ +(*(__htmlDefaultSAXHandler())) +#else +LIBXML_DLL_IMPORT extern xmlSAXHandler htmlDefaultSAXHandler; +#endif +#endif + + +/* + * Everything starting from the line below is + * Automatically generated by build_glob.py. + * Do not modify the previous line. + */ + + +extern int *__oldXMLWDcompatibility(void); +#ifdef LIBXML_THREAD_ENABLED +#define oldXMLWDcompatibility \ +(*(__oldXMLWDcompatibility())) +#else +LIBXML_DLL_IMPORT extern int oldXMLWDcompatibility; +#endif + +extern xmlBufferAllocationScheme *__xmlBufferAllocScheme(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlBufferAllocScheme \ +(*(__xmlBufferAllocScheme())) +#else +LIBXML_DLL_IMPORT extern xmlBufferAllocationScheme xmlBufferAllocScheme; +#endif + +extern int *__xmlDefaultBufferSize(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlDefaultBufferSize \ +(*(__xmlDefaultBufferSize())) +#else +LIBXML_DLL_IMPORT extern int xmlDefaultBufferSize; +#endif + +extern xmlSAXHandler *__xmlDefaultSAXHandler(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlDefaultSAXHandler \ +(*(__xmlDefaultSAXHandler())) +#else +LIBXML_DLL_IMPORT extern xmlSAXHandler xmlDefaultSAXHandler; +#endif + +extern xmlSAXLocator *__xmlDefaultSAXLocator(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlDefaultSAXLocator \ +(*(__xmlDefaultSAXLocator())) +#else +LIBXML_DLL_IMPORT extern xmlSAXLocator xmlDefaultSAXLocator; +#endif + +extern int *__xmlDoValidityCheckingDefaultValue(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlDoValidityCheckingDefaultValue \ +(*(__xmlDoValidityCheckingDefaultValue())) +#else +LIBXML_DLL_IMPORT extern int xmlDoValidityCheckingDefaultValue; +#endif + +extern xmlGenericErrorFunc *__xmlGenericError(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlGenericError \ +(*(__xmlGenericError())) +#else +LIBXML_DLL_IMPORT extern xmlGenericErrorFunc xmlGenericError; +#endif + +extern void * *__xmlGenericErrorContext(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlGenericErrorContext \ +(*(__xmlGenericErrorContext())) +#else +LIBXML_DLL_IMPORT extern void * xmlGenericErrorContext; +#endif + +extern int *__xmlGetWarningsDefaultValue(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlGetWarningsDefaultValue \ +(*(__xmlGetWarningsDefaultValue())) +#else +LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue; +#endif + +extern int *__xmlIndentTreeOutput(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlIndentTreeOutput \ +(*(__xmlIndentTreeOutput())) +#else +LIBXML_DLL_IMPORT extern int xmlIndentTreeOutput; +#endif + +extern const char * *__xmlTreeIndentString(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlTreeIndentString \ +(*(__xmlTreeIndentString())) +#else +LIBXML_DLL_IMPORT extern const char * xmlTreeIndentString; +#endif + +extern int *__xmlKeepBlanksDefaultValue(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlKeepBlanksDefaultValue \ +(*(__xmlKeepBlanksDefaultValue())) +#else +LIBXML_DLL_IMPORT extern int xmlKeepBlanksDefaultValue; +#endif + +extern int *__xmlLineNumbersDefaultValue(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlLineNumbersDefaultValue \ +(*(__xmlLineNumbersDefaultValue())) +#else +LIBXML_DLL_IMPORT extern int xmlLineNumbersDefaultValue; +#endif + +extern int *__xmlLoadExtDtdDefaultValue(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlLoadExtDtdDefaultValue \ +(*(__xmlLoadExtDtdDefaultValue())) +#else +LIBXML_DLL_IMPORT extern int xmlLoadExtDtdDefaultValue; +#endif + +extern int *__xmlParserDebugEntities(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlParserDebugEntities \ +(*(__xmlParserDebugEntities())) +#else +LIBXML_DLL_IMPORT extern int xmlParserDebugEntities; +#endif + +extern const char * *__xmlParserVersion(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlParserVersion \ +(*(__xmlParserVersion())) +#else +LIBXML_DLL_IMPORT extern const char * xmlParserVersion; +#endif + +extern int *__xmlPedanticParserDefaultValue(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlPedanticParserDefaultValue \ +(*(__xmlPedanticParserDefaultValue())) +#else +LIBXML_DLL_IMPORT extern int xmlPedanticParserDefaultValue; +#endif + +extern int *__xmlSaveNoEmptyTags(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlSaveNoEmptyTags \ +(*(__xmlSaveNoEmptyTags())) +#else +LIBXML_DLL_IMPORT extern int xmlSaveNoEmptyTags; +#endif + +extern int *__xmlSubstituteEntitiesDefaultValue(void); +#ifdef LIBXML_THREAD_ENABLED +#define xmlSubstituteEntitiesDefaultValue \ +(*(__xmlSubstituteEntitiesDefaultValue())) +#else +LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue; +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_GLOBALS_H */ diff --git a/bundle/libxml/include/libxml/hash.h b/bundle/libxml/include/libxml/hash.h new file mode 100644 index 0000000000..ec590c91fe --- /dev/null +++ b/bundle/libxml/include/libxml/hash.h @@ -0,0 +1,166 @@ +/* + * hash.h: chained hash tables + * + * Copyright (C) 2000 Bjorn Reese and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: bjorn.reese@systematic.dk + */ + +#ifndef __XML_HASH_H__ +#define __XML_HASH_H__ + +#include <libxml/parser.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The hash table. + */ +typedef struct _xmlHashTable xmlHashTable; +typedef xmlHashTable *xmlHashTablePtr; + +/* + * function types: + */ +/** + * xmlHashDeallocator: + * @payload: the data in the hash + * @name: the name associated + * + * Callback to free data from a hash. + */ +typedef void (*xmlHashDeallocator)(void *payload, xmlChar *name); +/** + * xmlHashCopier: + * @payload: the data in the hash + * @name: the name associated + * + * Callback to copy data from a hash. + * + * Returns a copy of the data or NULL in case of error. + */ +typedef void *(*xmlHashCopier)(void *payload, xmlChar *name); +/** + * xmlHashScanner: + * @payload: the data in the hash + * @data: extra scannner data + * @name: the name associated + * + * Callback when scanning data in a hash with the simple scanner. + */ +typedef void (*xmlHashScanner)(void *payload, void *data, xmlChar *name); +/** + * xmlHashScannerFull: + * @payload: the data in the hash + * @data: extra scannner data + * @name: the name associated + * @name2: the second name associated + * @name3: the third name associated + * + * Callback when scanning data in a hash with the full scanner. + */ +typedef void (*xmlHashScannerFull)(void *payload, void *data, + const xmlChar *name, const xmlChar *name2, + const xmlChar *name3); + +/* + * Constructor and destructor. + */ +xmlHashTablePtr xmlHashCreate (int size); +void xmlHashFree (xmlHashTablePtr table, + xmlHashDeallocator f); + +/* + * Add a new entry to the hash table. + */ +int xmlHashAddEntry (xmlHashTablePtr table, + const xmlChar *name, + void *userdata); +int xmlHashUpdateEntry(xmlHashTablePtr table, + const xmlChar *name, + void *userdata, + xmlHashDeallocator f); +int xmlHashAddEntry2(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + void *userdata); +int xmlHashUpdateEntry2(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + void *userdata, + xmlHashDeallocator f); +int xmlHashAddEntry3(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + void *userdata); +int xmlHashUpdateEntry3(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + void *userdata, + xmlHashDeallocator f); + +/* + * Remove an entry from the hash table. + */ +int xmlHashRemoveEntry(xmlHashTablePtr table, const xmlChar *name, + xmlHashDeallocator f); +int xmlHashRemoveEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, xmlHashDeallocator f); +int xmlHashRemoveEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + xmlHashDeallocator f); + +/* + * Retrieve the userdata. + */ +void * xmlHashLookup (xmlHashTablePtr table, + const xmlChar *name); +void * xmlHashLookup2 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2); +void * xmlHashLookup3 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3); + +/* + * Helpers. + */ +xmlHashTablePtr xmlHashCopy (xmlHashTablePtr table, + xmlHashCopier f); +int xmlHashSize (xmlHashTablePtr table); +void xmlHashScan (xmlHashTablePtr table, + xmlHashScanner f, + void *data); +void xmlHashScan3 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + xmlHashScanner f, + void *data); +void xmlHashScanFull (xmlHashTablePtr table, + xmlHashScannerFull f, + void *data); +void xmlHashScanFull3(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + xmlHashScannerFull f, + void *data); +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_HASH_H__ */ diff --git a/bundle/libxml/include/libxml/list.h b/bundle/libxml/include/libxml/list.h new file mode 100644 index 0000000000..8c9515fe1d --- /dev/null +++ b/bundle/libxml/include/libxml/list.h @@ -0,0 +1,116 @@ +/* + * list.h: lists interfaces + * + * Copyright (C) 2000 Gary Pennington and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: Gary.Pennington@uk.sun.com + */ + +#ifndef __XML_LINK_INCLUDE__ +#define __XML_LINK_INCLUDE__ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _xmlLink xmlLink; +typedef xmlLink *xmlLinkPtr; + +typedef struct _xmlList xmlList; +typedef xmlList *xmlListPtr; + +/** + * xmlListDeallocator: + * @lk: the data to deallocate + * + * Callback function used to free data from a list. + */ +typedef void (*xmlListDeallocator) (xmlLinkPtr lk); +/** + * xmlListDataCompare: + * @data0: the first data + * @data1: the second data + * + * Callback function used to compare 2 data. + * + * Returns 0 is equality, -1 or 1 otherwise depending on the ordering. + */ +typedef int (*xmlListDataCompare) (const void *data0, const void *data1); +/** + * xmlListWalker: + * @data: the data found in the list + * @user: extra user provided data to the walker + * + * Callback function used when walking a list with xmlListWalk(). + * + * Returns 0 to stop walking the list, 1 otherwise. + */ +typedef int (*xmlListWalker) (const void *data, const void *user); + +/* Creation/Deletion */ +xmlListPtr xmlListCreate (xmlListDeallocator deallocator, + xmlListDataCompare compare); +void xmlListDelete (xmlListPtr l); + +/* Basic Operators */ +void * xmlListSearch (xmlListPtr l, + void *data); +void * xmlListReverseSearch (xmlListPtr l, + void *data); +int xmlListInsert (xmlListPtr l, + void *data) ; +int xmlListAppend (xmlListPtr l, + void *data) ; +int xmlListRemoveFirst (xmlListPtr l, + void *data); +int xmlListRemoveLast (xmlListPtr l, + void *data); +int xmlListRemoveAll (xmlListPtr l, + void *data); +void xmlListClear (xmlListPtr l); +int xmlListEmpty (xmlListPtr l); +xmlLinkPtr xmlListFront (xmlListPtr l); +xmlLinkPtr xmlListEnd (xmlListPtr l); +int xmlListSize (xmlListPtr l); + +void xmlListPopFront (xmlListPtr l); +void xmlListPopBack (xmlListPtr l); +int xmlListPushFront (xmlListPtr l, + void *data); +int xmlListPushBack (xmlListPtr l, + void *data); + +/* Advanced Operators */ +void xmlListReverse (xmlListPtr l); +void xmlListSort (xmlListPtr l); +void xmlListWalk (xmlListPtr l, + xmlListWalker walker, + const void *user); +void xmlListReverseWalk (xmlListPtr l, + xmlListWalker walker, + const void *user); +void xmlListMerge (xmlListPtr l1, + xmlListPtr l2); +xmlListPtr xmlListDup (const xmlListPtr old); +int xmlListCopy (xmlListPtr cur, + const xmlListPtr old); +/* Link operators */ +void * xmlLinkGetData (xmlLinkPtr lk); + +/* xmlListUnique() */ +/* xmlListSwap */ + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_LINK_INCLUDE__ */ diff --git a/bundle/libxml/include/libxml/nanoftp.h b/bundle/libxml/include/libxml/nanoftp.h new file mode 100644 index 0000000000..a0ba2ceeb3 --- /dev/null +++ b/bundle/libxml/include/libxml/nanoftp.h @@ -0,0 +1,117 @@ +/* + * nanohttp.c: minimalist FTP implementation to fetch external subsets. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __NANO_FTP_H__ +#define __NANO_FTP_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_FTP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ftpListCallback: + * @userData: user provided data for the callback + * @filename: the file name (including "->" when links are shown) + * @attrib: the attribute string + * @owner: the owner string + * @group: the group string + * @size: the file size + * @links: the link count + * @year: the year + * @month: the month + * @day: the day + * @hour: the hour + * @minute: the minute + * + * A callback for the xmlNanoFTPList command. + * Note that only one of year and day:minute are specified. + */ +typedef void (*ftpListCallback) (void *userData, + const char *filename, const char *attrib, + const char *owner, const char *group, + unsigned long size, int links, int year, + const char *month, int day, int hour, + int minute); +/** + * ftpDataCallback: + * @userData: the user provided context + * @data: the data received + * @len: its size in bytes + * + * A callback for the xmlNanoFTPGet command. + */ +typedef void (*ftpDataCallback) (void *userData, + const char *data, + int len); + +/* + * Init + */ +void xmlNanoFTPInit (void); +void xmlNanoFTPCleanup (void); + +/* + * Creating/freeing contexts. + */ +void * xmlNanoFTPNewCtxt (const char *URL); +void xmlNanoFTPFreeCtxt (void * ctx); +void * xmlNanoFTPConnectTo (const char *server, + int port); +/* + * Opening/closing session connections. + */ +void * xmlNanoFTPOpen (const char *URL); +int xmlNanoFTPConnect (void *ctx); +int xmlNanoFTPClose (void *ctx); +int xmlNanoFTPQuit (void *ctx); +void xmlNanoFTPScanProxy (const char *URL); +void xmlNanoFTPProxy (const char *host, + int port, + const char *user, + const char *passwd, + int type); +int xmlNanoFTPUpdateURL (void *ctx, + const char *URL); + +/* + * Rather internal commands. + */ +int xmlNanoFTPGetResponse (void *ctx); +int xmlNanoFTPCheckResponse (void *ctx); + +/* + * CD/DIR/GET handlers. + */ +int xmlNanoFTPCwd (void *ctx, + char *directory); + +int xmlNanoFTPGetConnection (void *ctx); +int xmlNanoFTPCloseConnection(void *ctx); +int xmlNanoFTPList (void *ctx, + ftpListCallback callback, + void *userData, + char *filename); +int xmlNanoFTPGetSocket (void *ctx, + const char *filename); +int xmlNanoFTPGet (void *ctx, + ftpDataCallback callback, + void *userData, + const char *filename); +int xmlNanoFTPRead (void *ctx, + void *dest, + int len); + +#ifdef __cplusplus +} +#endif /* LIBXML_FTP_ENABLED */ +#endif +#endif /* __NANO_FTP_H__ */ diff --git a/bundle/libxml/include/libxml/nanohttp.h b/bundle/libxml/include/libxml/nanohttp.h new file mode 100644 index 0000000000..4fb4e1d256 --- /dev/null +++ b/bundle/libxml/include/libxml/nanohttp.h @@ -0,0 +1,56 @@ +/* + * nanohttp.c: minimalist HTTP implementation to fetch external subsets. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __NANO_HTTP_H__ +#define __NANO_HTTP_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_HTTP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif +void xmlNanoHTTPInit (void); +void xmlNanoHTTPCleanup (void); +void xmlNanoHTTPScanProxy (const char *URL); +int xmlNanoHTTPFetch (const char *URL, + const char *filename, + char **contentType); +void * xmlNanoHTTPMethod (const char *URL, + const char *method, + const char *input, + char **contentType, + const char *headers, + int ilen); +void * xmlNanoHTTPMethodRedir (const char *URL, + const char *method, + const char *input, + char **contentType, + char **redir, + const char *headers, + int ilen); +void * xmlNanoHTTPOpen (const char *URL, + char **contentType); +void * xmlNanoHTTPOpenRedir (const char *URL, + char **contentType, + char **redir); +int xmlNanoHTTPReturnCode (void *ctx); +const char * xmlNanoHTTPAuthHeader(void *ctx); +int xmlNanoHTTPRead (void *ctx, + void *dest, + int len); +int xmlNanoHTTPSave (void *ctxt, + const char *filename); +void xmlNanoHTTPClose (void *ctx); +#ifdef __cplusplus +} + +#endif /* LIBXML_HTTP_ENABLED */ +#endif +#endif /* __NANO_HTTP_H__ */ diff --git a/bundle/libxml/include/libxml/parser.h b/bundle/libxml/include/libxml/parser.h new file mode 100644 index 0000000000..e6725a364d --- /dev/null +++ b/bundle/libxml/include/libxml/parser.h @@ -0,0 +1,869 @@ +/* + * parser.h : Interfaces, constants and types related to the XML parser. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_PARSER_H__ +#define __XML_PARSER_H__ + +#include <libxml/tree.h> +#include <libxml/valid.h> +#include <libxml/entities.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XML_DEFAULT_VERSION: + * + * The default version of XML used: 1.0 + */ +#define XML_DEFAULT_VERSION "1.0" + +/** + * xmlParserInput: + * + * An xmlParserInput is an input flow for the XML processor. + * Each entity parsed is associated an xmlParserInput (except the + * few predefined ones). This is the case both for internal entities + * - in which case the flow is already completely in memory - or + * external entities - in which case we use the buf structure for + * progressive reading and I18N conversions to the internal UTF-8 format. + */ + +/** + * xmlParserInputDeallocate: + * @str: the string to deallocate + * + * Callback for freeing some parser input allocations. + */ +typedef void (* xmlParserInputDeallocate)(xmlChar *str); + +struct _xmlParserInput { + /* Input buffer */ + xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ + + const char *filename; /* The file analyzed, if any */ + const char *directory; /* the directory/base of the file */ + const xmlChar *base; /* Base of the array to parse */ + const xmlChar *cur; /* Current char being parsed */ + const xmlChar *end; /* end of the array to parse */ + int length; /* length if known */ + int line; /* Current line */ + int col; /* Current column */ + int consumed; /* How many xmlChars already consumed */ + xmlParserInputDeallocate free; /* function to deallocate the base */ + const xmlChar *encoding; /* the encoding string for entity */ + const xmlChar *version; /* the version string for entity */ + int standalone; /* Was that entity marked standalone */ +}; + +/** + * xmlParserNodeInfo: + * + * The parser can be asked to collect Node informations, i.e. at what + * place in the file they were detected. + * NOTE: This is off by default and not very well tested. + */ +typedef struct _xmlParserNodeInfo xmlParserNodeInfo; +typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; + +struct _xmlParserNodeInfo { + const struct _xmlNode* node; + /* Position & line # that text that created the node begins & ends on */ + unsigned long begin_pos; + unsigned long begin_line; + unsigned long end_pos; + unsigned long end_line; +}; + +typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; +typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; +struct _xmlParserNodeInfoSeq { + unsigned long maximum; + unsigned long length; + xmlParserNodeInfo* buffer; +}; + +/** + * xmlParserInputState: + * + * The parser is now working also as a state based parser. + * The recursive one use the state info for entities processing. + */ +typedef enum { + XML_PARSER_EOF = -1, /* nothing is to be parsed */ + XML_PARSER_START = 0, /* nothing has been parsed */ + XML_PARSER_MISC, /* Misc* before int subset */ + XML_PARSER_PI, /* Within a processing instruction */ + XML_PARSER_DTD, /* within some DTD content */ + XML_PARSER_PROLOG, /* Misc* after internal subset */ + XML_PARSER_COMMENT, /* within a comment */ + XML_PARSER_START_TAG, /* within a start tag */ + XML_PARSER_CONTENT, /* within the content */ + XML_PARSER_CDATA_SECTION, /* within a CDATA section */ + XML_PARSER_END_TAG, /* within a closing tag */ + XML_PARSER_ENTITY_DECL, /* within an entity declaration */ + XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ + XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ + XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ + XML_PARSER_EPILOG, /* the Misc* after the last end tag */ + XML_PARSER_IGNORE, /* within an IGNORED section */ + XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ +} xmlParserInputState; + +/** + * XML_DETECT_IDS: + * + * Bit in the loadsubset context field to tell to do ID/REFs lookups. + * Use it to initialize xmlLoadExtDtdDefaultValue. + */ +#define XML_DETECT_IDS 2 + +/** + * XML_COMPLETE_ATTRS: + * + * Bit in the loadsubset context field to tell to do complete the + * elements attributes lists with the ones defaulted from the DTDs. + * Use it to initialize xmlLoadExtDtdDefaultValue. + */ +#define XML_COMPLETE_ATTRS 4 + +/** + * xmlParserCtxt: + * + * The parser context. + * NOTE This doesn't completely define the parser state, the (current ?) + * design of the parser uses recursive function calls since this allow + * and easy mapping from the production rules of the specification + * to the actual code. The drawback is that the actual function call + * also reflect the parser state. However most of the parsing routines + * takes as the only argument the parser context pointer, so migrating + * to a state based parser for progressive parsing shouldn't be too hard. + */ +struct _xmlParserCtxt { + struct _xmlSAXHandler *sax; /* The SAX handler */ + void *userData; /* For SAX interface only, used by DOM build */ + xmlDocPtr myDoc; /* the document being built */ + int wellFormed; /* is the document well formed */ + int replaceEntities; /* shall we replace entities ? */ + const xmlChar *version; /* the XML version string */ + const xmlChar *encoding; /* the declared encoding, if any */ + int standalone; /* standalone document */ + int html; /* an HTML(1)/Docbook(2) document */ + + /* Input stream stack */ + xmlParserInputPtr input; /* Current input stream */ + int inputNr; /* Number of current input streams */ + int inputMax; /* Max number of input streams */ + xmlParserInputPtr *inputTab; /* stack of inputs */ + + /* Node analysis stack only used for DOM building */ + xmlNodePtr node; /* Current parsed Node */ + int nodeNr; /* Depth of the parsing stack */ + int nodeMax; /* Max depth of the parsing stack */ + xmlNodePtr *nodeTab; /* array of nodes */ + + int record_info; /* Whether node info should be kept */ + xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ + + int errNo; /* error code */ + + int hasExternalSubset; /* reference and external subset */ + int hasPErefs; /* the internal subset has PE refs */ + int external; /* are we parsing an external entity */ + + int valid; /* is the document valid */ + int validate; /* shall we try to validate ? */ + xmlValidCtxt vctxt; /* The validity context */ + + xmlParserInputState instate; /* current type of input */ + int token; /* next char look-ahead */ + + char *directory; /* the data directory */ + + /* Node name stack */ + xmlChar *name; /* Current parsed Node */ + int nameNr; /* Depth of the parsing stack */ + int nameMax; /* Max depth of the parsing stack */ + xmlChar * *nameTab; /* array of nodes */ + + long nbChars; /* number of xmlChar processed */ + long checkIndex; /* used by progressive parsing lookup */ + int keepBlanks; /* ugly but ... */ + int disableSAX; /* SAX callbacks are disabled */ + int inSubset; /* Parsing is in int 1/ext 2 subset */ + xmlChar * intSubName; /* name of subset */ + xmlChar * extSubURI; /* URI of external subset */ + xmlChar * extSubSystem; /* SYSTEM ID of external subset */ + + /* xml:space values */ + int * space; /* Should the parser preserve spaces */ + int spaceNr; /* Depth of the parsing stack */ + int spaceMax; /* Max depth of the parsing stack */ + int * spaceTab; /* array of space infos */ + + int depth; /* to prevent entity substitution loops */ + xmlParserInputPtr entity; /* used to check entities boundaries */ + int charset; /* encoding of the in-memory content + actually an xmlCharEncoding */ + int nodelen; /* Those two fields are there to */ + int nodemem; /* Speed up large node parsing */ + int pedantic; /* signal pedantic warnings */ + void *_private; /* For user data, libxml won't touch it */ + + int loadsubset; /* should the external subset be loaded */ + int linenumbers; /* set line number in element content */ + void *catalogs; /* document's own catalog */ + int recovery; /* run in recovery mode */ +}; + +/** + * xmlSAXLocator: + * + * A SAX Locator. + */ +struct _xmlSAXLocator { + const xmlChar *(*getPublicId)(void *ctx); + const xmlChar *(*getSystemId)(void *ctx); + int (*getLineNumber)(void *ctx); + int (*getColumnNumber)(void *ctx); +}; + +/** + * xmlSAXHandler: + * + * A SAX handler is bunch of callbacks called by the parser when processing + * of the input generate data or structure informations. + */ + +/** + * resolveEntitySAXFunc: + * @ctx: the user data (XML parser context) + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * Callback: + * The entity loader, to control the loading of external entities, + * the application can either: + * - override this resolveEntity() callback in the SAX block + * - or better use the xmlSetExternalEntityLoader() function to + * set up it's own entity resolution routine + * + * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. + */ +typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, + const xmlChar *publicId, + const xmlChar *systemId); +/** + * internalSubsetSAXFunc: + * @ctx: the user data (XML parser context) + * @name: the root element name + * @ExternalID: the external ID + * @SystemID: the SYSTEM ID (e.g. filename or URL) + * + * Callback on internal subset declaration. + */ +typedef void (*internalSubsetSAXFunc) (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +/** + * externalSubsetSAXFunc: + * @ctx: the user data (XML parser context) + * @name: the root element name + * @ExternalID: the external ID + * @SystemID: the SYSTEM ID (e.g. filename or URL) + * + * Callback on external subset declaration. + */ +typedef void (*externalSubsetSAXFunc) (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +/** + * getEntitySAXFunc: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Get an entity by name. + * + * Returns the xmlEntityPtr if found. + */ +typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, + const xmlChar *name); +/** + * getParameterEntitySAXFunc: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Get a parameter entity by name. + * + * Returns the xmlEntityPtr if found. + */ +typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, + const xmlChar *name); +/** + * entityDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @name: the entity name + * @type: the entity type + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @content: the entity value (without processing). + * + * An entity definition has been parsed. + */ +typedef void (*entityDeclSAXFunc) (void *ctx, + const xmlChar *name, + int type, + const xmlChar *publicId, + const xmlChar *systemId, + xmlChar *content); +/** + * notationDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The name of the notation + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * What to do when a notation declaration has been parsed. + */ +typedef void (*notationDeclSAXFunc)(void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId); +/** + * attributeDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @elem: the name of the element + * @fullname: the attribute name + * @type: the attribute type + * @def: the type of default value + * @defaultValue: the attribute default value + * @tree: the tree of enumerated value set + * + * An attribute definition has been parsed. + */ +typedef void (*attributeDeclSAXFunc)(void *ctx, + const xmlChar *elem, + const xmlChar *fullname, + int type, + int def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +/** + * elementDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @name: the element name + * @type: the element type + * @content: the element value tree + * + * An element definition has been parsed. + */ +typedef void (*elementDeclSAXFunc)(void *ctx, + const xmlChar *name, + int type, + xmlElementContentPtr content); +/** + * unparsedEntityDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The name of the entity + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @notationName: the name of the notation + * + * What to do when an unparsed entity declaration is parsed. + */ +typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId, + const xmlChar *notationName); +/** + * setDocumentLocatorSAXFunc: + * @ctx: the user data (XML parser context) + * @loc: A SAX Locator + * + * Receive the document locator at startup, actually xmlDefaultSAXLocator. + * Everything is available on the context, so this is useless in our case. + */ +typedef void (*setDocumentLocatorSAXFunc) (void *ctx, + xmlSAXLocatorPtr loc); +/** + * startDocumentSAXFunc: + * @ctx: the user data (XML parser context) + * + * Called when the document start being processed. + */ +typedef void (*startDocumentSAXFunc) (void *ctx); +/** + * endDocumentSAXFunc: + * @ctx: the user data (XML parser context) + * + * Called when the document end has been detected. + */ +typedef void (*endDocumentSAXFunc) (void *ctx); +/** + * startElementSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The element name, including namespace prefix + * @atts: An array of name/value attributes pairs, NULL terminated + * + * Called when an opening tag has been processed. + */ +typedef void (*startElementSAXFunc) (void *ctx, + const xmlChar *name, + const xmlChar **atts); +/** + * endElementSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The element name + * + * Called when the end of an element has been detected. + */ +typedef void (*endElementSAXFunc) (void *ctx, + const xmlChar *name); +/** + * attributeSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The attribute name, including namespace prefix + * @value: The attribute value + * + * Handle an attribute that has been read by the parser. + * The default handling is to convert the attribute into an + * DOM subtree and past it in a new xmlAttr element added to + * the element. + */ +typedef void (*attributeSAXFunc) (void *ctx, + const xmlChar *name, + const xmlChar *value); +/** + * referenceSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Called when an entity reference is detected. + */ +typedef void (*referenceSAXFunc) (void *ctx, + const xmlChar *name); +/** + * charactersSAXFunc: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * Receiving some chars from the parser. + */ +typedef void (*charactersSAXFunc) (void *ctx, + const xmlChar *ch, + int len); +/** + * ignorableWhitespaceSAXFunc: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * Receiving some ignorable whitespaces from the parser. + * UNUSED: by default the DOM building will use characters. + */ +typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, + const xmlChar *ch, + int len); +/** + * processingInstructionSAXFunc: + * @ctx: the user data (XML parser context) + * @target: the target name + * @data: the PI data's + * + * A processing instruction has been parsed. + */ +typedef void (*processingInstructionSAXFunc) (void *ctx, + const xmlChar *target, + const xmlChar *data); +/** + * commentSAXFunc: + * @ctx: the user data (XML parser context) + * @value: the comment content + * + * A comment has been parsed. + */ +typedef void (*commentSAXFunc) (void *ctx, + const xmlChar *value); +/** + * cdataBlockSAXFunc: + * @ctx: the user data (XML parser context) + * @value: The pcdata content + * @len: the block length + * + * Called when a pcdata block has been parsed. + */ +typedef void (*cdataBlockSAXFunc) ( + void *ctx, + const xmlChar *value, + int len); +/** + * warningSAXFunc: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a warning messages, callback. + */ +typedef void (*warningSAXFunc) (void *ctx, + const char *msg, ...); +/** + * errorSAXFunc: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format an error messages, callback. + */ +typedef void (*errorSAXFunc) (void *ctx, + const char *msg, ...); +/** + * fatalErrorSAXFunc: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format fatal error messages, callback. + * Note: so far fatalError() SAX callbacks are not used, error() + * get all the callbacks for errors. + */ +typedef void (*fatalErrorSAXFunc) (void *ctx, + const char *msg, ...); +/** + * isStandaloneSAXFunc: + * @ctx: the user data (XML parser context) + * + * Is this document tagged standalone? + * + * Returns 1 if true + */ +typedef int (*isStandaloneSAXFunc) (void *ctx); +/** + * hasInternalSubsetSAXFunc: + * @ctx: the user data (XML parser context) + * + * Does this document has an internal subset. + * + * Returns 1 if true + */ +typedef int (*hasInternalSubsetSAXFunc) (void *ctx); +/** + * hasExternalSubsetSAXFunc: + * @ctx: the user data (XML parser context) + * + * Does this document has an external subset? + * + * Returns 1 if true + */ +typedef int (*hasExternalSubsetSAXFunc) (void *ctx); + +struct _xmlSAXHandler { + internalSubsetSAXFunc internalSubset; + isStandaloneSAXFunc isStandalone; + hasInternalSubsetSAXFunc hasInternalSubset; + hasExternalSubsetSAXFunc hasExternalSubset; + resolveEntitySAXFunc resolveEntity; + getEntitySAXFunc getEntity; + entityDeclSAXFunc entityDecl; + notationDeclSAXFunc notationDecl; + attributeDeclSAXFunc attributeDecl; + elementDeclSAXFunc elementDecl; + unparsedEntityDeclSAXFunc unparsedEntityDecl; + setDocumentLocatorSAXFunc setDocumentLocator; + startDocumentSAXFunc startDocument; + endDocumentSAXFunc endDocument; + startElementSAXFunc startElement; + endElementSAXFunc endElement; + referenceSAXFunc reference; + charactersSAXFunc characters; + ignorableWhitespaceSAXFunc ignorableWhitespace; + processingInstructionSAXFunc processingInstruction; + commentSAXFunc comment; + warningSAXFunc warning; + errorSAXFunc error; + fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ + getParameterEntitySAXFunc getParameterEntity; + cdataBlockSAXFunc cdataBlock; + externalSubsetSAXFunc externalSubset; + int initialized; +}; + +/** + * xmlExternalEntityLoader: + * @URL: The System ID of the resource requested + * @ID: The Public ID of the resource requested + * @context: the XML parser context + * + * External entity loaders types. + * + * Returns the entity input parser. + */ +typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, + const char *ID, + xmlParserCtxtPtr context); + +/* + * Global variables: just the default SAX interface tables and XML + * version infos. + */ +#if 0 +LIBXML_DLL_IMPORT extern const char *xmlParserVersion; +#endif + +/* +LIBXML_DLL_IMPORT extern xmlSAXLocator xmlDefaultSAXLocator; +LIBXML_DLL_IMPORT extern xmlSAXHandler xmlDefaultSAXHandler; +LIBXML_DLL_IMPORT extern xmlSAXHandler htmlDefaultSAXHandler; +LIBXML_DLL_IMPORT extern xmlSAXHandler docbDefaultSAXHandler; + */ + +/* + * Entity substitution default behavior. + */ + +#if 0 +LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue; +LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue; +#endif + +#ifdef __cplusplus +} +#endif +#include <libxml/encoding.h> +#include <libxml/xmlIO.h> +#include <libxml/globals.h> +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Init/Cleanup + */ +void xmlInitParser (void); +void xmlCleanupParser (void); + +/* + * Input functions + */ +int xmlParserInputRead (xmlParserInputPtr in, + int len); +int xmlParserInputGrow (xmlParserInputPtr in, + int len); + +/* + * xmlChar handling + */ +xmlChar * xmlStrdup (const xmlChar *cur); +xmlChar * xmlStrndup (const xmlChar *cur, + int len); +xmlChar * xmlCharStrndup (const char *cur, + int len); +xmlChar * xmlCharStrdup (const char *cur); +xmlChar * xmlStrsub (const xmlChar *str, + int start, + int len); +const xmlChar * xmlStrchr (const xmlChar *str, + xmlChar val); +const xmlChar * xmlStrstr (const xmlChar *str, + const xmlChar *val); +const xmlChar * xmlStrcasestr (const xmlChar *str, + xmlChar *val); +int xmlStrcmp (const xmlChar *str1, + const xmlChar *str2); +int xmlStrncmp (const xmlChar *str1, + const xmlChar *str2, + int len); +int xmlStrcasecmp (const xmlChar *str1, + const xmlChar *str2); +int xmlStrncasecmp (const xmlChar *str1, + const xmlChar *str2, + int len); +int xmlStrEqual (const xmlChar *str1, + const xmlChar *str2); +int xmlStrlen (const xmlChar *str); +xmlChar * xmlStrcat (xmlChar *cur, + const xmlChar *add); +xmlChar * xmlStrncat (xmlChar *cur, + const xmlChar *add, + int len); + +/* + * Basic parsing Interfaces + */ +xmlDocPtr xmlParseDoc (xmlChar *cur); +xmlDocPtr xmlParseMemory (const char *buffer, + int size); +xmlDocPtr xmlParseFile (const char *filename); +int xmlSubstituteEntitiesDefault(int val); +int xmlKeepBlanksDefault (int val); +void xmlStopParser (xmlParserCtxtPtr ctxt); +int xmlPedanticParserDefault(int val); +int xmlLineNumbersDefault (int val); + +/* + * Recovery mode + */ +xmlDocPtr xmlRecoverDoc (xmlChar *cur); +xmlDocPtr xmlRecoverMemory (const char *buffer, + int size); +xmlDocPtr xmlRecoverFile (const char *filename); + +/* + * Less common routines and SAX interfaces + */ +int xmlParseDocument (xmlParserCtxtPtr ctxt); +int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); +xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax, + xmlChar *cur, + int recovery); +int xmlSAXUserParseFile (xmlSAXHandlerPtr sax, + void *user_data, + const char *filename); +int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, + void *user_data, + const char *buffer, + int size); +xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax, + const char *buffer, + int size, + int recovery); +xmlDocPtr xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, + const char *buffer, + int size, + int recovery, + void *data); +xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax, + const char *filename, + int recovery); +xmlDocPtr xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, + const char *filename, + int recovery, + void *data); +xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax, + const char *filename); +xmlDocPtr xmlParseEntity (const char *filename); +xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax, + xmlParserInputBufferPtr input, + xmlCharEncoding enc); +int xmlParseBalancedChunkMemory(xmlDocPtr doc, + xmlSAXHandlerPtr sax, + void *user_data, + int depth, + const xmlChar *string, + xmlNodePtr *lst); +int xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, + xmlSAXHandlerPtr sax, + void *user_data, + int depth, + const xmlChar *string, + xmlNodePtr *lst, + int recover); +int xmlParseExternalEntity (xmlDocPtr doc, + xmlSAXHandlerPtr sax, + void *user_data, + int depth, + const xmlChar *URL, + const xmlChar *ID, + xmlNodePtr *lst); +int xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, + const xmlChar *URL, + const xmlChar *ID, + xmlNodePtr *lst); + +/* + * Parser contexts handling. + */ +void xmlInitParserCtxt (xmlParserCtxtPtr ctxt); +void xmlClearParserCtxt (xmlParserCtxtPtr ctxt); +void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); +void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, + const xmlChar* buffer, + const char *filename); +xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); + +/* + * Reading/setting optional parsing features. + */ + +int xmlGetFeaturesList (int *len, + const char **result); +int xmlGetFeature (xmlParserCtxtPtr ctxt, + const char *name, + void *result); +int xmlSetFeature (xmlParserCtxtPtr ctxt, + const char *name, + void *value); + +/* + * Interfaces for the Push mode. + */ +xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename); +int xmlParseChunk (xmlParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); + +/* + * Special I/O mode. + */ + +xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, + void *user_data, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + xmlCharEncoding enc); + +xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt, + xmlParserInputBufferPtr input, + xmlCharEncoding enc); + +/* + * Node infos. + */ +const xmlParserNodeInfo* + xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, + const xmlNodePtr node); +void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); +void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); +unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, + const xmlNodePtr node); +void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, + const xmlParserNodeInfoPtr info); + +/* + * External entities handling actually implemented in xmlIO. + */ + +void xmlSetExternalEntityLoader(xmlExternalEntityLoader f); +xmlExternalEntityLoader + xmlGetExternalEntityLoader(void); +xmlParserInputPtr + xmlLoadExternalEntity (const char *URL, + const char *ID, + xmlParserCtxtPtr ctxt); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_PARSER_H__ */ + diff --git a/bundle/libxml/include/libxml/parserInternals.h b/bundle/libxml/include/libxml/parserInternals.h new file mode 100644 index 0000000000..8507442be8 --- /dev/null +++ b/bundle/libxml/include/libxml/parserInternals.h @@ -0,0 +1,413 @@ +/* + * parserInternals.h : internals routines exported by the parser. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + * + */ + +#ifndef __XML_PARSER_INTERNALS_H__ +#define __XML_PARSER_INTERNALS_H__ + +#include <libxml/parser.h> +#include <libxml/HTMLparser.h> + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * XML_MAX_NAMELEN: + * + * Identifiers can be longer, but this will be more costly + * at runtime. + */ +#define XML_MAX_NAMELEN 100 + +/** + * INPUT_CHUNK: + * + * The parser tries to always have that amount of input ready. + * One of the point is providing context when reporting errors. + */ +#define INPUT_CHUNK 250 + +/************************************************************************ + * * + * UNICODE version of the macros. * + * * + ************************************************************************/ +/** + * IS_CHAR: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ +#define IS_CHAR(c) \ + ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \ + ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \ + (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \ + (((c) >= 0x10000) && ((c) <= 0x10FFFF))) + +/** + * IS_BLANK: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [3] S ::= (#x20 | #x9 | #xD | #xA)+ + */ +#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \ + ((c) == 0x0D)) + +/** + * IS_BASECHAR: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [85] BaseChar ::= ... long list see REC ... + */ +#define IS_BASECHAR(c) xmlIsBaseChar(c) + +/** + * IS_DIGIT: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [88] Digit ::= ... long list see REC ... + */ +#define IS_DIGIT(c) xmlIsDigit(c) + +/** + * IS_COMBINING: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [87] CombiningChar ::= ... long list see REC ... + */ +#define IS_COMBINING(c) xmlIsCombining(c) + +/** + * IS_EXTENDER: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * + * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | + * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | + * [#x309D-#x309E] | [#x30FC-#x30FE] + */ +#define IS_EXTENDER(c) xmlIsExtender(c) + +/** + * IS_IDEOGRAPHIC: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * + * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] + */ +#define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c) + +/** + * IS_LETTER: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * + * [84] Letter ::= BaseChar | Ideographic + */ +#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) + + +/** + * IS_PUBIDCHAR: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * + * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + */ +#define IS_PUBIDCHAR(c) xmlIsPubidChar(c) + +/** + * SKIP_EOL: + * @p: and UTF8 string pointer + * + * Skips the end of line chars. + */ +#define SKIP_EOL(p) \ + if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ + if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } + +/** + * MOVETO_ENDTAG: + * @p: and UTF8 string pointer + * + * Skips to the next '>' char. + */ +#define MOVETO_ENDTAG(p) \ + while ((*p) && (*(p) != '>')) (p)++ + +/** + * MOVETO_STARTTAG: + * @p: and UTF8 string pointer + * + * Skips to the next '<' char. + */ +#define MOVETO_STARTTAG(p) \ + while ((*p) && (*(p) != '<')) (p)++ + +/** + * Global variables used for predefined strings. + */ +LIBXML_DLL_IMPORT extern const xmlChar xmlStringText[]; +LIBXML_DLL_IMPORT extern const xmlChar xmlStringTextNoenc[]; +LIBXML_DLL_IMPORT extern const xmlChar xmlStringComment[]; + +/* + * Function to finish the work of the macros where needed. + */ +int xmlIsBaseChar (int c); +int xmlIsBlank (int c); +int xmlIsPubidChar (int c); +int xmlIsLetter (int c); +int xmlIsDigit (int c); +int xmlIsIdeographic(int c); +int xmlIsExtender (int c); +int xmlIsCombining (int c); +int xmlIsChar (int c); + +/** + * Parser context. + */ +xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename); +xmlParserCtxtPtr xmlCreateMemoryParserCtxt(const char *buffer, + int size); +xmlParserCtxtPtr xmlNewParserCtxt (void); +xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL, + const xmlChar *ID, + const xmlChar *base); +int xmlSwitchEncoding (xmlParserCtxtPtr ctxt, + xmlCharEncoding enc); +int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt, + xmlCharEncodingHandlerPtr handler); + +/** + * Entities + */ +void xmlHandleEntity (xmlParserCtxtPtr ctxt, + xmlEntityPtr entity); + +/** + * Input Streams. + */ +xmlParserInputPtr xmlNewStringInputStream (xmlParserCtxtPtr ctxt, + const xmlChar *buffer); +xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt, + xmlEntityPtr entity); +void xmlPushInput (xmlParserCtxtPtr ctxt, + xmlParserInputPtr input); +xmlChar xmlPopInput (xmlParserCtxtPtr ctxt); +void xmlFreeInputStream (xmlParserInputPtr input); +xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt, + const char *filename); +xmlParserInputPtr xmlNewInputStream (xmlParserCtxtPtr ctxt); + +/** + * Namespaces. + */ +xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt, + const xmlChar *name, + xmlChar **prefix); +xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt); +xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt, + xmlChar **prefix); +xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt); +void xmlParseNamespace (xmlParserCtxtPtr ctxt); + +/** + * Generic production rules. + */ +xmlChar * xmlScanName (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseName (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt, + xmlChar **orig); +xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt); +xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt); +void xmlParseCharData (xmlParserCtxtPtr ctxt, + int cdata); +xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt, + xmlChar **publicID, + int strict); +void xmlParseComment (xmlParserCtxtPtr ctxt); +xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt); +void xmlParsePI (xmlParserCtxtPtr ctxt); +void xmlParseNotationDecl (xmlParserCtxtPtr ctxt); +void xmlParseEntityDecl (xmlParserCtxtPtr ctxt); +int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt, + xmlChar **value); +xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt); +xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt); +int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt, + xmlEnumerationPtr *tree); +int xmlParseAttributeType (xmlParserCtxtPtr ctxt, + xmlEnumerationPtr *tree); +void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt); +xmlElementContentPtr xmlParseElementMixedContentDecl + (xmlParserCtxtPtr ctxt, + xmlParserInputPtr inputchk); +xmlElementContentPtr xmlParseElementChildrenContentDecl + (xmlParserCtxtPtr ctxt, + xmlParserInputPtr inputchk); +int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, + xmlChar *name, + xmlElementContentPtr *result); +int xmlParseElementDecl (xmlParserCtxtPtr ctxt); +void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt); +int xmlParseCharRef (xmlParserCtxtPtr ctxt); +xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt); +void xmlParseReference (xmlParserCtxtPtr ctxt); +void xmlParsePEReference (xmlParserCtxtPtr ctxt); +void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt, + xmlChar **value); +xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt); +void xmlParseEndTag (xmlParserCtxtPtr ctxt); +void xmlParseCDSect (xmlParserCtxtPtr ctxt); +void xmlParseContent (xmlParserCtxtPtr ctxt); +void xmlParseElement (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); +int xmlParseSDDecl (xmlParserCtxtPtr ctxt); +void xmlParseXMLDecl (xmlParserCtxtPtr ctxt); +void xmlParseTextDecl (xmlParserCtxtPtr ctxt); +void xmlParseMisc (xmlParserCtxtPtr ctxt); +void xmlParseExternalSubset (xmlParserCtxtPtr ctxt, + const xmlChar *ExternalID, + const xmlChar *SystemID); +/** + * XML_SUBSTITUTE_NONE: + * + * If no entities need to be substituted. + */ +#define XML_SUBSTITUTE_NONE 0 +/** + * XML_SUBSTITUTE_REF: + * + * Whether general entities need to be substituted. + */ +#define XML_SUBSTITUTE_REF 1 +/** + * XML_SUBSTITUTE_PEREF: + * + * Whether parameter entities need to be substituted. + */ +#define XML_SUBSTITUTE_PEREF 2 +/** + * XML_SUBSTITUTE_BOTH: + * + * Both general and parameter entities need to be substituted. + */ +#define XML_SUBSTITUTE_BOTH 3 + +xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt, + int len, + int what, + xmlChar end, + xmlChar end2, + xmlChar end3); +xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt, + const xmlChar *str, + int what, + xmlChar end, + xmlChar end2, + xmlChar end3); + +/* + * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP. + */ +int nodePush (xmlParserCtxtPtr ctxt, + xmlNodePtr value); +xmlNodePtr nodePop (xmlParserCtxtPtr ctxt); +int inputPush (xmlParserCtxtPtr ctxt, + xmlParserInputPtr value); +xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt); +xmlChar *namePop (xmlParserCtxtPtr ctxt); +int namePush (xmlParserCtxtPtr ctxt, + xmlChar *value); + +/* + * other commodities shared between parser.c and parserInternals. + */ +int xmlSkipBlankChars (xmlParserCtxtPtr ctxt); +int xmlStringCurrentChar (xmlParserCtxtPtr ctxt, + const xmlChar *cur, + int *len); +void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); +void xmlParserHandleReference(xmlParserCtxtPtr ctxt); +int xmlCheckLanguageID (const xmlChar *lang); + +/* + * Really core function shared with HTML parser. + */ +int xmlCurrentChar (xmlParserCtxtPtr ctxt, + int *len); +int xmlCopyCharMultiByte (xmlChar *out, + int val); +int xmlCopyChar (int len, + xmlChar *out, + int val); +void xmlNextChar (xmlParserCtxtPtr ctxt); +void xmlParserInputShrink (xmlParserInputPtr in); + +#ifdef LIBXML_HTML_ENABLED +/* + * Actually comes from the HTML parser but launched from the init stuff. + */ +void htmlInitAutoClose (void); +htmlParserCtxtPtr htmlCreateFileParserCtxt(const char *filename, + const char *encoding); +#endif + +/* + * Specific function to keep track of entities references + * and used by the XSLT debugger. + */ +/** + * xmlEntityReferenceFunc: + * @ent: the entity + * @firstNode: the fist node in the chunk + * @lastNode: the last nod in the chunk + * + * Callback function used when one needs to be able to track back the + * provenance of a chunk of nodes inherited from an entity replacement. + */ +typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent, + xmlNodePtr firstNode, + xmlNodePtr lastNode); + +void xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func); + + +#ifdef __cplusplus +} +#endif +#endif /* __XML_PARSER_INTERNALS_H__ */ diff --git a/bundle/libxml/include/libxml/schemasInternals.h b/bundle/libxml/include/libxml/schemasInternals.h new file mode 100644 index 0000000000..29ad862145 --- /dev/null +++ b/bundle/libxml/include/libxml/schemasInternals.h @@ -0,0 +1,353 @@ +/* + * schemasInternals.h : internal interfaces for the XML Schemas handling + * and schema validity checking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef __XML_SCHEMA_INTERNALS_H__ +#define __XML_SCHEMA_INTERNALS_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include <libxml/xmlregexp.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * XML Schemas defines multiple type of types. + */ +typedef enum { + XML_SCHEMA_TYPE_BASIC = 1, + XML_SCHEMA_TYPE_ANY, + XML_SCHEMA_TYPE_FACET, + XML_SCHEMA_TYPE_SIMPLE, + XML_SCHEMA_TYPE_COMPLEX, + XML_SCHEMA_TYPE_SEQUENCE, + XML_SCHEMA_TYPE_CHOICE, + XML_SCHEMA_TYPE_ALL, + XML_SCHEMA_TYPE_SIMPLE_CONTENT, + XML_SCHEMA_TYPE_COMPLEX_CONTENT, + XML_SCHEMA_TYPE_UR, + XML_SCHEMA_TYPE_RESTRICTION, + XML_SCHEMA_TYPE_EXTENSION, + XML_SCHEMA_TYPE_ELEMENT, + XML_SCHEMA_TYPE_ATTRIBUTE, + XML_SCHEMA_TYPE_ATTRIBUTEGROUP, + XML_SCHEMA_TYPE_GROUP, + XML_SCHEMA_TYPE_NOTATION, + XML_SCHEMA_TYPE_LIST, + XML_SCHEMA_TYPE_UNION, + XML_SCHEMA_FACET_MININCLUSIVE = 1000, + XML_SCHEMA_FACET_MINEXCLUSIVE, + XML_SCHEMA_FACET_MAXINCLUSIVE, + XML_SCHEMA_FACET_MAXEXCLUSIVE, + XML_SCHEMA_FACET_TOTALDIGITS, + XML_SCHEMA_FACET_FRACTIONDIGITS, + XML_SCHEMA_FACET_PATTERN, + XML_SCHEMA_FACET_ENUMERATION, + XML_SCHEMA_FACET_WHITESPACE, + XML_SCHEMA_FACET_LENGTH, + XML_SCHEMA_FACET_MAXLENGTH, + XML_SCHEMA_FACET_MINLENGTH +} xmlSchemaTypeType; + +typedef enum { + XML_SCHEMA_CONTENT_UNKNOWN = 0, + XML_SCHEMA_CONTENT_EMPTY = 1, + XML_SCHEMA_CONTENT_ELEMENTS, + XML_SCHEMA_CONTENT_MIXED, + XML_SCHEMA_CONTENT_SIMPLE, + XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS, + XML_SCHEMA_CONTENT_BASIC, + XML_SCHEMA_CONTENT_ANY +} xmlSchemaContentType; + +typedef struct _xmlSchemaVal xmlSchemaVal; +typedef xmlSchemaVal *xmlSchemaValPtr; + +typedef struct _xmlSchemaType xmlSchemaType; +typedef xmlSchemaType *xmlSchemaTypePtr; + +typedef struct _xmlSchemaFacet xmlSchemaFacet; +typedef xmlSchemaFacet *xmlSchemaFacetPtr; + +/** + * Annotation + */ +typedef struct _xmlSchemaAnnot xmlSchemaAnnot; +typedef xmlSchemaAnnot *xmlSchemaAnnotPtr; +struct _xmlSchemaAnnot { + struct _xmlSchemaAnnot *next; + xmlNodePtr content; /* the annotation */ +}; + +/** + * An attribute definition. + */ + +#define XML_SCHEMAS_ANYATTR_SKIP 1 +#define XML_SCHEMAS_ANYATTR_LAX 2 +#define XML_SCHEMAS_ANYATTR_STRICT 3 + +typedef struct _xmlSchemaAttribute xmlSchemaAttribute; +typedef xmlSchemaAttribute *xmlSchemaAttributePtr; +struct _xmlSchemaAttribute { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaAttribute *next;/* the next attribute if in a group ... */ + xmlChar *name; + xmlChar *id; + xmlChar *ref; + xmlChar *refNs; + xmlChar *typeName; + xmlChar *typeNs; + xmlSchemaAnnotPtr annot; + + xmlSchemaTypePtr base; + int occurs; + xmlChar *defValue; + xmlSchemaTypePtr subtypes; +}; + +/** + * An attribute group definition. + * + * xmlSchemaAttribute and xmlSchemaAttributeGroup start of structures + * must be kept similar + */ +typedef struct _xmlSchemaAttributeGroup xmlSchemaAttributeGroup; +typedef xmlSchemaAttributeGroup *xmlSchemaAttributeGroupPtr; +struct _xmlSchemaAttributeGroup { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaAttribute *next;/* the next attribute if in a group ... */ + xmlChar *name; + xmlChar *id; + xmlChar *ref; + xmlChar *refNs; + xmlSchemaAnnotPtr annot; + + xmlSchemaAttributePtr attributes; +}; + + +/** + * XML_SCHEMAS_TYPE_MIXED: + * + * the element content type is mixed + */ +#define XML_SCHEMAS_TYPE_MIXED 1 << 0 + +/** + * _xmlSchemaType: + * + * Schemas type definition. + */ +struct _xmlSchemaType { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaType *next;/* the next type if in a sequence ... */ + xmlChar *name; + xmlChar *id; + xmlChar *ref; + xmlChar *refNs; + xmlSchemaAnnotPtr annot; + xmlSchemaTypePtr subtypes; + xmlSchemaAttributePtr attributes; + xmlNodePtr node; + int minOccurs; + int maxOccurs; + + int flags; + xmlSchemaContentType contentType; + xmlChar *base; + xmlChar *baseNs; + xmlSchemaTypePtr baseType; + xmlSchemaFacetPtr facets; +}; + +/** + * xmlSchemaElement: + * An element definition. + * + * xmlSchemaType, xmlSchemaFacet and xmlSchemaElement start of + * structures must be kept similar + */ +/** + * XML_SCHEMAS_ELEM_NILLABLE: + * + * the element is nillable + */ +#define XML_SCHEMAS_ELEM_NILLABLE 1 << 0 +/** + * XML_SCHEMAS_ELEM_GLOBAL: + * + * the element is global + */ +#define XML_SCHEMAS_ELEM_GLOBAL 1 << 1 +/** + * XML_SCHEMAS_ELEM_DEFAULT: + * + * the element has a default value + */ +#define XML_SCHEMAS_ELEM_DEFAULT 1 << 2 +/** + * XML_SCHEMAS_ELEM_FIXED: + * + * the element has a fixed value + */ +#define XML_SCHEMAS_ELEM_FIXED 1 << 3 +/** + * XML_SCHEMAS_ELEM_ABSTRACT: + * + * the element is abstract + */ +#define XML_SCHEMAS_ELEM_ABSTRACT 1 << 4 +/** + * XML_SCHEMAS_ELEM_TOPLEVEL: + * + * the element is top level + */ +#define XML_SCHEMAS_ELEM_TOPLEVEL 1 << 5 +/** + * XML_SCHEMAS_ELEM_REF: + * + * the element is a reference to a type + */ +#define XML_SCHEMAS_ELEM_REF 1 << 6 + +typedef struct _xmlSchemaElement xmlSchemaElement; +typedef xmlSchemaElement *xmlSchemaElementPtr; +struct _xmlSchemaElement { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaType *next;/* the next type if in a sequence ... */ + xmlChar *name; + xmlChar *id; + xmlChar *ref; + xmlChar *refNs; + xmlSchemaAnnotPtr annot; + xmlSchemaTypePtr subtypes; + xmlSchemaAttributePtr attributes; + xmlNodePtr node; + int minOccurs; + int maxOccurs; + + int flags; + xmlChar *targetNamespace; + xmlChar *namedType; + xmlChar *namedTypeNs; + xmlChar *substGroup; + xmlChar *substGroupNs; + xmlChar *scope; + xmlChar *value; + struct _xmlSchemaElement *refDecl; + xmlRegexpPtr contModel; + xmlSchemaContentType contentType; +}; + +/* + * XML_SCHEMAS_FACET_UNKNOWN: + * + * unknown facet handling + */ +#define XML_SCHEMAS_FACET_UNKNOWN 0 +/* + * XML_SCHEMAS_FACET_PRESERVE: + * + * preserve the type of the facet + */ +#define XML_SCHEMAS_FACET_PRESERVE 1 +/* + * XML_SCHEMAS_FACET_REPLACE: + * + * replace the type of the facet + */ +#define XML_SCHEMAS_FACET_REPLACE 2 +/* + * XML_SCHEMAS_FACET_COLLAPSE: + * + * collapse the types of the facet + */ +#define XML_SCHEMAS_FACET_COLLAPSE 3 + +/** + * A facet definition. + */ +struct _xmlSchemaFacet { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaFacet *next;/* the next type if in a sequence ... */ + xmlChar *value; + xmlChar *id; + xmlSchemaAnnotPtr annot; + xmlNodePtr node; + int fixed; + int whitespace; + xmlSchemaValPtr val; + xmlRegexpPtr regexp; +}; + +/** + * A notation definition. + */ +typedef struct _xmlSchemaNotation xmlSchemaNotation; +typedef xmlSchemaNotation *xmlSchemaNotationPtr; +struct _xmlSchemaNotation { + xmlSchemaTypeType type; /* The kind of type */ + xmlChar *name; + xmlSchemaAnnotPtr annot; + xmlChar *identifier; +}; + +/** + * XML_SCHEMAS_QUALIF_ELEM: + * + * the shemas requires qualified elements + */ +#define XML_SCHEMAS_QUALIF_ELEM 1 << 0 +/** + * XML_SCHEMAS_QUALIF_ATTR: + * + * the shemas requires qualified attributes + */ +#define XML_SCHEMAS_QUALIF_ATTR 1 << 1 +/** + * _xmlSchema: + * + * A Schemas definition + */ +struct _xmlSchema { + xmlChar *name; /* schema name */ + xmlChar *targetNamespace; /* the target namespace */ + xmlChar *version; + xmlChar *id; + xmlDocPtr doc; + xmlSchemaAnnotPtr annot; + int flags; + + xmlHashTablePtr typeDecl; + xmlHashTablePtr attrDecl; + xmlHashTablePtr attrgrpDecl; + xmlHashTablePtr elemDecl; + xmlHashTablePtr notaDecl; + + xmlHashTablePtr schemasImports; + + void *_private; /* unused by the library for users or bindings */ +}; + +void xmlSchemaFreeType (xmlSchemaTypePtr type); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_INTERNALS_H__ */ + + diff --git a/bundle/libxml/include/libxml/threads.h b/bundle/libxml/include/libxml/threads.h new file mode 100644 index 0000000000..afca78ffe5 --- /dev/null +++ b/bundle/libxml/include/libxml/threads.h @@ -0,0 +1,62 @@ +/** + * threads.c: set of generic threading related routines + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_THREADS_H__ +#define __XML_THREADS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * xmlMutex are a simple mutual exception locks. + */ +typedef struct _xmlMutex xmlMutex; +typedef xmlMutex *xmlMutexPtr; + +/* + * xmlRMutex are reentrant mutual exception locks. + */ +typedef struct _xmlRMutex xmlRMutex; +typedef xmlRMutex *xmlRMutexPtr; + +#ifdef __cplusplus +} +#endif +#include <libxml/globals.h> +#ifdef __cplusplus +extern "C" { +#endif + +xmlMutexPtr xmlNewMutex (void); +void xmlMutexLock (xmlMutexPtr tok); +void xmlMutexUnlock (xmlMutexPtr tok); +void xmlFreeMutex (xmlMutexPtr tok); + +xmlRMutexPtr xmlNewRMutex (void); +void xmlRMutexLock (xmlRMutexPtr tok); +void xmlRMutexUnlock (xmlRMutexPtr tok); +void xmlFreeRMutex (xmlRMutexPtr tok); + +/* + * Library wide APIs. + */ +void xmlInitThreads (void); +void xmlLockLibrary (void); +void xmlUnlockLibrary(void); +int xmlGetThreadId (void); +int xmlIsMainThread (void); +void xmlCleanupThreads(void); +xmlGlobalStatePtr xmlGetGlobalState(void); + +#ifdef __cplusplus +} +#endif + + +#endif /* __XML_THREADS_H__ */ diff --git a/bundle/libxml/include/libxml/tree.h b/bundle/libxml/include/libxml/tree.h new file mode 100644 index 0000000000..93d57e3126 --- /dev/null +++ b/bundle/libxml/include/libxml/tree.h @@ -0,0 +1,894 @@ +/* + * tree.h : describes the structures found in an tree resulting + * from an XML parsing. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + * + */ + +#ifndef __XML_TREE_H__ +#define __XML_TREE_H__ + +#include <stdio.h> +#include <libxml/xmlversion.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Some of the basic types pointer to structures: + */ +/* xmlIO.h */ +typedef struct _xmlParserInputBuffer xmlParserInputBuffer; +typedef xmlParserInputBuffer *xmlParserInputBufferPtr; + +typedef struct _xmlOutputBuffer xmlOutputBuffer; +typedef xmlOutputBuffer *xmlOutputBufferPtr; + +/* parser.h */ +typedef struct _xmlParserInput xmlParserInput; +typedef xmlParserInput *xmlParserInputPtr; + +typedef struct _xmlParserCtxt xmlParserCtxt; +typedef xmlParserCtxt *xmlParserCtxtPtr; + +typedef struct _xmlSAXLocator xmlSAXLocator; +typedef xmlSAXLocator *xmlSAXLocatorPtr; + +typedef struct _xmlSAXHandler xmlSAXHandler; +typedef xmlSAXHandler *xmlSAXHandlerPtr; + +/* entities.h */ +typedef struct _xmlEntity xmlEntity; +typedef xmlEntity *xmlEntityPtr; + +/** + * BASE_BUFFER_SIZE: + * + * default buffer size 4000. + */ +#define BASE_BUFFER_SIZE 4000 + +/** + * XML_XML_NAMESPACE: + * + * This is the namespace for the special xml: prefix predefined in the + * XML Namespace specification. + */ +#define XML_XML_NAMESPACE \ + (const xmlChar *) "http://www.w3.org/XML/1998/namespace" + +/* + * The different element types carried by an XML tree. + * + * NOTE: This is synchronized with DOM Level1 values + * See http://www.w3.org/TR/REC-DOM-Level-1/ + * + * Actually this had diverged a bit, and now XML_DOCUMENT_TYPE_NODE should + * be deprecated to use an XML_DTD_NODE. + */ +typedef enum { + XML_ELEMENT_NODE= 1, + XML_ATTRIBUTE_NODE= 2, + XML_TEXT_NODE= 3, + XML_CDATA_SECTION_NODE= 4, + XML_ENTITY_REF_NODE= 5, + XML_ENTITY_NODE= 6, + XML_PI_NODE= 7, + XML_COMMENT_NODE= 8, + XML_DOCUMENT_NODE= 9, + XML_DOCUMENT_TYPE_NODE= 10, + XML_DOCUMENT_FRAG_NODE= 11, + XML_NOTATION_NODE= 12, + XML_HTML_DOCUMENT_NODE= 13, + XML_DTD_NODE= 14, + XML_ELEMENT_DECL= 15, + XML_ATTRIBUTE_DECL= 16, + XML_ENTITY_DECL= 17, + XML_NAMESPACE_DECL= 18, + XML_XINCLUDE_START= 19, + XML_XINCLUDE_END= 20 +#ifdef LIBXML_DOCB_ENABLED + ,XML_DOCB_DOCUMENT_NODE= 21 +#endif +} xmlElementType; + +/** + * xmlChar: + * + * This is a basic byte in an UTF-8 encoded string. + * It's unsigned allowing to pinpoint case where char * are assigned + * to xmlChar * (possibly making serialization back impossible). + */ + +typedef unsigned char xmlChar; + +/** + * BAD_CAST: + * + * Macro to cast a string to an xmlChar * when one know its safe. + */ +#define BAD_CAST (xmlChar *) + +/** + * xmlNotation: + * + * A DTD Notation definition. + */ + +typedef struct _xmlNotation xmlNotation; +typedef xmlNotation *xmlNotationPtr; +struct _xmlNotation { + const xmlChar *name; /* Notation name */ + const xmlChar *PublicID; /* Public identifier, if any */ + const xmlChar *SystemID; /* System identifier, if any */ +}; + +/** + * xmlAttributeType: + * + * A DTD Attribute type definition. + */ + +typedef enum { + XML_ATTRIBUTE_CDATA = 1, + XML_ATTRIBUTE_ID, + XML_ATTRIBUTE_IDREF , + XML_ATTRIBUTE_IDREFS, + XML_ATTRIBUTE_ENTITY, + XML_ATTRIBUTE_ENTITIES, + XML_ATTRIBUTE_NMTOKEN, + XML_ATTRIBUTE_NMTOKENS, + XML_ATTRIBUTE_ENUMERATION, + XML_ATTRIBUTE_NOTATION +} xmlAttributeType; + +/** + * xmlAttributeDefault: + * + * A DTD Attribute default definition. + */ + +typedef enum { + XML_ATTRIBUTE_NONE = 1, + XML_ATTRIBUTE_REQUIRED, + XML_ATTRIBUTE_IMPLIED, + XML_ATTRIBUTE_FIXED +} xmlAttributeDefault; + +/** + * xmlEnumeration: + * + * List structure used when there is an enumeration in DTDs. + */ + +typedef struct _xmlEnumeration xmlEnumeration; +typedef xmlEnumeration *xmlEnumerationPtr; +struct _xmlEnumeration { + struct _xmlEnumeration *next; /* next one */ + const xmlChar *name; /* Enumeration name */ +}; + +/** + * xmlAttribute: + * + * An Attribute declaration in a DTD. + */ + +typedef struct _xmlAttribute xmlAttribute; +typedef xmlAttribute *xmlAttributePtr; +struct _xmlAttribute { + void *_private; /* application data */ + xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */ + const xmlChar *name; /* Attribute name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + struct _xmlAttribute *nexth; /* next in hash table */ + xmlAttributeType atype; /* The attribute type */ + xmlAttributeDefault def; /* the default */ + const xmlChar *defaultValue; /* or the default value */ + xmlEnumerationPtr tree; /* or the enumeration tree if any */ + const xmlChar *prefix; /* the namespace prefix if any */ + const xmlChar *elem; /* Element holding the attribute */ +}; + +/** + * xmlElementContentType: + * + * Possible definitions of element content types. + */ +typedef enum { + XML_ELEMENT_CONTENT_PCDATA = 1, + XML_ELEMENT_CONTENT_ELEMENT, + XML_ELEMENT_CONTENT_SEQ, + XML_ELEMENT_CONTENT_OR +} xmlElementContentType; + +/** + * xmlElementContentOccur: + * + * Possible definitions of element content occurrences. + */ +typedef enum { + XML_ELEMENT_CONTENT_ONCE = 1, + XML_ELEMENT_CONTENT_OPT, + XML_ELEMENT_CONTENT_MULT, + XML_ELEMENT_CONTENT_PLUS +} xmlElementContentOccur; + +/** + * xmlElementContent: + * + * An XML Element content as stored after parsing an element definition + * in a DTD. + */ + +typedef struct _xmlElementContent xmlElementContent; +typedef xmlElementContent *xmlElementContentPtr; +struct _xmlElementContent { + xmlElementContentType type; /* PCDATA, ELEMENT, SEQ or OR */ + xmlElementContentOccur ocur; /* ONCE, OPT, MULT or PLUS */ + const xmlChar *name; /* Element name */ + struct _xmlElementContent *c1; /* first child */ + struct _xmlElementContent *c2; /* second child */ + struct _xmlElementContent *parent; /* parent */ + const xmlChar *prefix; /* Namespace prefix */ +}; + +/** + * xmlElementTypeVal: + * + * The different possibilities for an element content type. + */ + +typedef enum { + XML_ELEMENT_TYPE_UNDEFINED = 0, + XML_ELEMENT_TYPE_EMPTY = 1, + XML_ELEMENT_TYPE_ANY, + XML_ELEMENT_TYPE_MIXED, + XML_ELEMENT_TYPE_ELEMENT +} xmlElementTypeVal; + + +#ifdef __cplusplus +} +#endif +#include <libxml/xmlregexp.h> +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlElement: + * + * An XML Element declaration from a DTD. + */ + +typedef struct _xmlElement xmlElement; +typedef xmlElement *xmlElementPtr; +struct _xmlElement { + void *_private; /* application data */ + xmlElementType type; /* XML_ELEMENT_DECL, must be second ! */ + const xmlChar *name; /* Element name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + xmlElementTypeVal etype; /* The type */ + xmlElementContentPtr content; /* the allowed element content */ + xmlAttributePtr attributes; /* List of the declared attributes */ + const xmlChar *prefix; /* the namespace prefix if any */ +#ifdef LIBXML_REGEXP_ENABLED + xmlRegexpPtr contModel; /* the validating regexp */ +#else + void *contModel; +#endif +}; + + +/** + * XML_LOCAL_NAMESPACE: + * + * A namespace declaration node. + */ +#define XML_LOCAL_NAMESPACE XML_NAMESPACE_DECL +typedef xmlElementType xmlNsType; + +/** + * xmlNs: + * + * An XML namespace. + * Note that prefix == NULL is valid, it defines the default namespace + * within the subtree (until overridden). + * + * xmlNsType is unified with xmlElementType. + */ + +typedef struct _xmlNs xmlNs; +typedef xmlNs *xmlNsPtr; +struct _xmlNs { + struct _xmlNs *next; /* next Ns link for this node */ + xmlNsType type; /* global or local */ + const xmlChar *href; /* URL for the namespace */ + const xmlChar *prefix; /* prefix for the namespace */ + void *_private; /* application data */ +}; + +/** + * xmlDtd: + * + * An XML DTD, as defined by <!DOCTYPE ... There is actually one for + * the internal subset and for the external subset. + */ +typedef struct _xmlDtd xmlDtd; +typedef xmlDtd *xmlDtdPtr; +struct _xmlDtd { + void *_private; /* application data */ + xmlElementType type; /* XML_DTD_NODE, must be second ! */ + const xmlChar *name; /* Name of the DTD */ + struct _xmlNode *children; /* the value of the property link */ + struct _xmlNode *last; /* last child link */ + struct _xmlDoc *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + /* End of common part */ + void *notations; /* Hash table for notations if any */ + void *elements; /* Hash table for elements if any */ + void *attributes; /* Hash table for attributes if any */ + void *entities; /* Hash table for entities if any */ + const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */ + const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */ + void *pentities; /* Hash table for param entities if any */ +}; + +/** + * xmlAttr: + * + * An attribute on an XML node. + */ +typedef struct _xmlAttr xmlAttr; +typedef xmlAttr *xmlAttrPtr; +struct _xmlAttr { + void *_private; /* application data */ + xmlElementType type; /* XML_ATTRIBUTE_NODE, must be second ! */ + const xmlChar *name; /* the name of the property */ + struct _xmlNode *children; /* the value of the property */ + struct _xmlNode *last; /* NULL */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlAttr *next; /* next sibling link */ + struct _xmlAttr *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + xmlNs *ns; /* pointer to the associated namespace */ + xmlAttributeType atype; /* the attribute type if validating */ +}; + +/** + * xmlID: + * + * An XML ID instance. + */ + +typedef struct _xmlID xmlID; +typedef xmlID *xmlIDPtr; +struct _xmlID { + struct _xmlID *next; /* next ID */ + const xmlChar *value; /* The ID name */ + xmlAttrPtr attr; /* The attribute holding it */ +}; + +/** + * xmlRef: + * + * An XML IDREF instance. + */ + +typedef struct _xmlRef xmlRef; +typedef xmlRef *xmlRefPtr; +struct _xmlRef { + struct _xmlRef *next; /* next Ref */ + const xmlChar *value; /* The Ref name */ + xmlAttrPtr attr; /* The attribute holding it */ +}; + +/** + * xmlBufferAllocationScheme: + * + * A buffer allocation scheme can be defined to either match exactly the + * need or double it's allocated size each time it is found too small. + */ + +typedef enum { + XML_BUFFER_ALLOC_DOUBLEIT, + XML_BUFFER_ALLOC_EXACT +} xmlBufferAllocationScheme; + +/** + * xmlBuffer: + * + * A buffer structure. + */ +typedef struct _xmlBuffer xmlBuffer; +typedef xmlBuffer *xmlBufferPtr; +struct _xmlBuffer { + xmlChar *content; /* The buffer content UTF8 */ + unsigned int use; /* The buffer size used */ + unsigned int size; /* The buffer size */ + xmlBufferAllocationScheme alloc; /* The realloc method */ +}; + +/** + * xmlNode: + * + * A node in an XML tree. + */ +typedef struct _xmlNode xmlNode; +typedef xmlNode *xmlNodePtr; +struct _xmlNode { + void *_private; /* application data */ + xmlElementType type; /* type number, must be second ! */ + const xmlChar *name; /* the name of the node, or the entity */ + struct _xmlNode *children; /* parent->childs link */ + struct _xmlNode *last; /* last child link */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + /* End of common part */ + xmlNs *ns; /* pointer to the associated namespace */ + xmlChar *content; /* the content */ + struct _xmlAttr *properties;/* properties list */ + xmlNs *nsDef; /* namespace definitions on this node */ +}; + +/** + * XML_GET_CONTENT: + * + * Macro to extract the content pointer of a node. + */ +#define XML_GET_CONTENT(n) \ + ((n)->type == XML_ELEMENT_NODE ? NULL : (n)->content) + +/** + * XML_GET_LINE: + * + * Macro to extract the line number of an element node. + * This will work only if line numbering is activated by + * calling xmlLineNumbersDefault(1) before parsing. + */ +#define XML_GET_LINE(n) \ + ((n)->type == XML_ELEMENT_NODE ? (int) (n)->content : 0) + +/** + * xmlDoc: + * + * An XML document. + */ +typedef struct _xmlDoc xmlDoc; +typedef xmlDoc *xmlDocPtr; +struct _xmlDoc { + void *_private; /* application data */ + xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */ + char *name; /* name/filename/URI of the document */ + struct _xmlNode *children; /* the document tree */ + struct _xmlNode *last; /* last child link */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* autoreference to itself */ + + /* End of common part */ + int compression;/* level of zlib compression */ + int standalone; /* standalone document (no external refs) */ + struct _xmlDtd *intSubset; /* the document internal subset */ + struct _xmlDtd *extSubset; /* the document external subset */ + struct _xmlNs *oldNs; /* Global namespace, the old way */ + const xmlChar *version; /* the XML version string */ + const xmlChar *encoding; /* external initial encoding, if any */ + void *ids; /* Hash table for ID attributes if any */ + void *refs; /* Hash table for IDREFs attributes if any */ + const xmlChar *URL; /* The URI for that document */ + int charset; /* encoding of the in-memory content + actually an xmlCharEncoding */ +}; + +/** + * xmlChildrenNode: + * + * Macro for compatibility naming layer with libxml1. + */ +#ifndef xmlChildrenNode +#define xmlChildrenNode children +#endif + +/** + * xmlRootNode: + * + * Macro for compatibility naming layer with libxml1. + */ +#ifndef xmlRootNode +#define xmlRootNode children +#endif + +/* + * Variables. + */ +#if 0 +LIBXML_DLL_IMPORT extern int oldXMLWDcompatibility;/* maintain compatibility with old WD */ +LIBXML_DLL_IMPORT extern int xmlIndentTreeOutput; /* try to indent the tree dumps */ +LIBXML_DLL_IMPORT extern xmlBufferAllocationScheme xmlBufferAllocScheme; /* alloc scheme to use */ +LIBXML_DLL_IMPORT extern int xmlSaveNoEmptyTags; /* save empty tags as <empty></empty> */ +LIBXML_DLL_IMPORT extern int xmlDefaultBufferSize; /* default buffer size */ +#endif + +/* + * Handling Buffers. + */ + +void xmlSetBufferAllocationScheme(xmlBufferAllocationScheme scheme); +xmlBufferAllocationScheme xmlGetBufferAllocationScheme(void); + +xmlBufferPtr xmlBufferCreate (void); +xmlBufferPtr xmlBufferCreateSize (size_t size); +int xmlBufferResize (xmlBufferPtr buf, + unsigned int size); +void xmlBufferFree (xmlBufferPtr buf); +int xmlBufferDump (FILE *file, + xmlBufferPtr buf); +void xmlBufferAdd (xmlBufferPtr buf, + const xmlChar *str, + int len); +void xmlBufferAddHead (xmlBufferPtr buf, + const xmlChar *str, + int len); +void xmlBufferCat (xmlBufferPtr buf, + const xmlChar *str); +void xmlBufferCCat (xmlBufferPtr buf, + const char *str); +int xmlBufferShrink (xmlBufferPtr buf, + unsigned int len); +int xmlBufferGrow (xmlBufferPtr buf, + unsigned int len); +void xmlBufferEmpty (xmlBufferPtr buf); +const xmlChar* xmlBufferContent (const xmlBufferPtr buf); +void xmlBufferSetAllocationScheme(xmlBufferPtr buf, + xmlBufferAllocationScheme scheme); +int xmlBufferLength (const xmlBufferPtr buf); + +/* + * Creating/freeing new structures. + */ +xmlDtdPtr xmlCreateIntSubset (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlNewDtd (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc); +void xmlFreeDtd (xmlDtdPtr cur); +xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc, + const xmlChar *href, + const xmlChar *prefix); +xmlNsPtr xmlNewNs (xmlNodePtr node, + const xmlChar *href, + const xmlChar *prefix); +void xmlFreeNs (xmlNsPtr cur); +void xmlFreeNsList (xmlNsPtr cur); +xmlDocPtr xmlNewDoc (const xmlChar *version); +void xmlFreeDoc (xmlDocPtr cur); +xmlAttrPtr xmlNewDocProp (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *value); +xmlAttrPtr xmlNewProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); +xmlAttrPtr xmlNewNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *value); +xmlAttrPtr xmlNewNsPropEatName (xmlNodePtr node, + xmlNsPtr ns, + xmlChar *name, + const xmlChar *value); +void xmlFreePropList (xmlAttrPtr cur); +void xmlFreeProp (xmlAttrPtr cur); +xmlAttrPtr xmlCopyProp (xmlNodePtr target, + xmlAttrPtr cur); +xmlAttrPtr xmlCopyPropList (xmlNodePtr target, + xmlAttrPtr cur); +xmlDtdPtr xmlCopyDtd (xmlDtdPtr dtd); +xmlDocPtr xmlCopyDoc (xmlDocPtr doc, + int recursive); + +/* + * Creating new nodes. + */ +xmlNodePtr xmlNewDocNode (xmlDocPtr doc, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocNodeEatName (xmlDocPtr doc, + xmlNsPtr ns, + xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocRawNode (xmlDocPtr doc, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewNode (xmlNsPtr ns, + const xmlChar *name); +xmlNodePtr xmlNewNodeEatName (xmlNsPtr ns, + xmlChar *name); +xmlNodePtr xmlNewChild (xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewTextChild (xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocText (xmlDocPtr doc, + const xmlChar *content); +xmlNodePtr xmlNewText (const xmlChar *content); +xmlNodePtr xmlNewPI (const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocTextLen (xmlDocPtr doc, + const xmlChar *content, + int len); +xmlNodePtr xmlNewTextLen (const xmlChar *content, + int len); +xmlNodePtr xmlNewDocComment (xmlDocPtr doc, + const xmlChar *content); +xmlNodePtr xmlNewComment (const xmlChar *content); +xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc, + const xmlChar *content, + int len); +xmlNodePtr xmlNewCharRef (xmlDocPtr doc, + const xmlChar *name); +xmlNodePtr xmlNewReference (xmlDocPtr doc, + const xmlChar *name); +xmlNodePtr xmlCopyNode (const xmlNodePtr node, + int recursive); +xmlNodePtr xmlDocCopyNode (const xmlNodePtr node, + xmlDocPtr doc, + int recursive); +xmlNodePtr xmlCopyNodeList (const xmlNodePtr node); +xmlNodePtr xmlNewDocFragment (xmlDocPtr doc); + +/* + * Navigating. + */ +long xmlGetLineNo (xmlNodePtr node); +xmlChar * xmlGetNodePath (xmlNodePtr node); +xmlNodePtr xmlDocGetRootElement (xmlDocPtr doc); +xmlNodePtr xmlGetLastChild (xmlNodePtr parent); +int xmlNodeIsText (xmlNodePtr node); +int xmlIsBlankNode (xmlNodePtr node); + +/* + * Changing the structure. + */ +xmlNodePtr xmlDocSetRootElement (xmlDocPtr doc, + xmlNodePtr root); +void xmlNodeSetName (xmlNodePtr cur, + const xmlChar *name); +xmlNodePtr xmlAddChild (xmlNodePtr parent, + xmlNodePtr cur); +xmlNodePtr xmlAddChildList (xmlNodePtr parent, + xmlNodePtr cur); +xmlNodePtr xmlReplaceNode (xmlNodePtr old, + xmlNodePtr cur); +xmlNodePtr xmlAddSibling (xmlNodePtr cur, + xmlNodePtr elem); +xmlNodePtr xmlAddPrevSibling (xmlNodePtr cur, + xmlNodePtr elem); +xmlNodePtr xmlAddNextSibling (xmlNodePtr cur, + xmlNodePtr elem); +void xmlUnlinkNode (xmlNodePtr cur); +xmlNodePtr xmlTextMerge (xmlNodePtr first, + xmlNodePtr second); +void xmlTextConcat (xmlNodePtr node, + const xmlChar *content, + int len); +void xmlFreeNodeList (xmlNodePtr cur); +void xmlFreeNode (xmlNodePtr cur); +void xmlSetTreeDoc (xmlNodePtr tree, + xmlDocPtr doc); +void xmlSetListDoc (xmlNodePtr list, + xmlDocPtr doc); + +/* + * Namespaces. + */ +xmlNsPtr xmlSearchNs (xmlDocPtr doc, + xmlNodePtr node, + const xmlChar *nameSpace); +xmlNsPtr xmlSearchNsByHref (xmlDocPtr doc, + xmlNodePtr node, + const xmlChar *href); +xmlNsPtr * xmlGetNsList (xmlDocPtr doc, + xmlNodePtr node); +void xmlSetNs (xmlNodePtr node, + xmlNsPtr ns); +xmlNsPtr xmlCopyNamespace (xmlNsPtr cur); +xmlNsPtr xmlCopyNamespaceList (xmlNsPtr cur); + +/* + * Changing the content. + */ +xmlAttrPtr xmlSetProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); +xmlChar * xmlGetProp (xmlNodePtr node, + const xmlChar *name); +xmlAttrPtr xmlHasProp (xmlNodePtr node, + const xmlChar *name); +xmlAttrPtr xmlHasNsProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *nameSpace); +xmlAttrPtr xmlSetNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *value); +xmlChar * xmlGetNsProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *nameSpace); +xmlNodePtr xmlStringGetNodeList (xmlDocPtr doc, + const xmlChar *value); +xmlNodePtr xmlStringLenGetNodeList (xmlDocPtr doc, + const xmlChar *value, + int len); +xmlChar * xmlNodeListGetString (xmlDocPtr doc, + xmlNodePtr list, + int inLine); +xmlChar * xmlNodeListGetRawString (xmlDocPtr doc, + xmlNodePtr list, + int inLine); +void xmlNodeSetContent (xmlNodePtr cur, + const xmlChar *content); +void xmlNodeSetContentLen (xmlNodePtr cur, + const xmlChar *content, + int len); +void xmlNodeAddContent (xmlNodePtr cur, + const xmlChar *content); +void xmlNodeAddContentLen (xmlNodePtr cur, + const xmlChar *content, + int len); +xmlChar * xmlNodeGetContent (xmlNodePtr cur); +xmlChar * xmlNodeGetLang (xmlNodePtr cur); +void xmlNodeSetLang (xmlNodePtr cur, + const xmlChar *lang); +int xmlNodeGetSpacePreserve (xmlNodePtr cur); +void xmlNodeSetSpacePreserve (xmlNodePtr cur, + int val); +xmlChar * xmlNodeGetBase (xmlDocPtr doc, + xmlNodePtr cur); +void xmlNodeSetBase (xmlNodePtr cur, + xmlChar *uri); + +/* + * Removing content. + */ +int xmlRemoveProp (xmlAttrPtr cur); +int xmlUnsetProp (xmlNodePtr node, + const xmlChar *name); +int xmlUnsetNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name); + +/* + * Internal, don't use. + */ +void xmlBufferWriteCHAR (xmlBufferPtr buf, + const xmlChar *string); +void xmlBufferWriteChar (xmlBufferPtr buf, + const char *string); +void xmlBufferWriteQuotedString(xmlBufferPtr buf, + const xmlChar *string); + +/* + * Namespace handling. + */ +int xmlReconciliateNs (xmlDocPtr doc, + xmlNodePtr tree); + +/* + * Saving. + */ +void xmlDocDumpFormatMemory (xmlDocPtr cur, + xmlChar **mem, + int *size, + int format); +void xmlDocDumpMemory (xmlDocPtr cur, + xmlChar **mem, + int *size); +void xmlDocDumpMemoryEnc (xmlDocPtr out_doc, + xmlChar **doc_txt_ptr, + int * doc_txt_len, + const char *txt_encoding); +void xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, + xmlChar **doc_txt_ptr, + int * doc_txt_len, + const char *txt_encoding, + int format); +int xmlDocFormatDump(FILE *f, + xmlDocPtr cur, + int format); +int xmlDocDump (FILE *f, + xmlDocPtr cur); +void xmlElemDump (FILE *f, + xmlDocPtr doc, + xmlNodePtr cur); +int xmlSaveFile (const char *filename, + xmlDocPtr cur); +int xmlSaveFormatFile (const char *filename, + xmlDocPtr cur, + int format); +int xmlNodeDump (xmlBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format); + +int xmlSaveFileTo (xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding); +int xmlSaveFormatFileTo (xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding, + int format); +void xmlNodeDumpOutput (xmlOutputBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format, + const char *encoding); + +int xmlSaveFormatFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding, + int format); + +int xmlSaveFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding); + +/* + * XHTML + */ +int xmlIsXHTML (const xmlChar *systemID, + const xmlChar *publicID); + +/* + * Compression. + */ +int xmlGetDocCompressMode (xmlDocPtr doc); +void xmlSetDocCompressMode (xmlDocPtr doc, + int mode); +int xmlGetCompressMode (void); +void xmlSetCompressMode (int mode); + +#ifdef __cplusplus +} +#endif +#ifndef __XML_PARSER_H__ +#include <libxml/xmlmemory.h> +#endif + +#endif /* __XML_TREE_H__ */ + diff --git a/bundle/libxml/include/libxml/uri.h b/bundle/libxml/include/libxml/uri.h new file mode 100644 index 0000000000..8ca7dad981 --- /dev/null +++ b/bundle/libxml/include/libxml/uri.h @@ -0,0 +1,67 @@ +/** + * uri.c: library of generic URI related routines + * + * Reference: RFC 2396 + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_URI_H__ +#define __XML_URI_H__ + +#include <libxml/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlURI: + * + * A parsed URI reference. This is a struct containing the various fields + * as described in RFC 2396 but separated for further processing. + */ +typedef struct _xmlURI xmlURI; +typedef xmlURI *xmlURIPtr; +struct _xmlURI { + char *scheme; /* the URI scheme */ + char *opaque; /* opaque part */ + char *authority; /* the authority part */ + char *server; /* the server part */ + char *user; /* the user part */ + int port; /* the port number */ + char *path; /* the path string */ + char *query; /* the query string */ + char *fragment; /* the fragment identifier */ + int cleanup; /* parsing potentially unclean URI */ +}; + +/* + * This function is in tree.h: + * xmlChar * xmlNodeGetBase (xmlDocPtr doc, + * xmlNodePtr cur); + */ +xmlURIPtr xmlCreateURI (void); +xmlChar * xmlBuildURI (const xmlChar *URI, + const xmlChar *base); +xmlURIPtr xmlParseURI (const char *str); +int xmlParseURIReference (xmlURIPtr uri, + const char *str); +xmlChar * xmlSaveUri (xmlURIPtr uri); +void xmlPrintURI (FILE *stream, + xmlURIPtr uri); +xmlChar * xmlURIEscapeStr (const xmlChar *str, + const xmlChar *list); +char * xmlURIUnescapeString (const char *str, + int len, + char *target); +int xmlNormalizeURIPath (char *path); +xmlChar * xmlURIEscape (const xmlChar *str); +void xmlFreeURI (xmlURIPtr uri); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_URI_H__ */ diff --git a/bundle/libxml/include/libxml/valid.h b/bundle/libxml/include/libxml/valid.h new file mode 100644 index 0000000000..4cc5f9788d --- /dev/null +++ b/bundle/libxml/include/libxml/valid.h @@ -0,0 +1,319 @@ +/* + * valid.h : interface to the DTD handling and the validity checking + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + + +#ifndef __XML_VALID_H__ +#define __XML_VALID_H__ + +#include <libxml/tree.h> +#include <libxml/list.h> +#include <libxml/xmlautomata.h> +#include <libxml/xmlregexp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Validation state added for non-determinist content model. + */ +typedef struct _xmlValidState xmlValidState; +typedef xmlValidState *xmlValidStatePtr; + +/** + * xmlValidityErrorFunc: + * @ctx: an xmlValidCtxtPtr validity error context + * @msg: the string to format *printf like vararg + * @...: remaining arguments to the format + * + * Callback called when a validity error is found. This is a message + * oriented function similar to an *printf function. + */ +typedef void (*xmlValidityErrorFunc) (void *ctx, + const char *msg, + ...); + +/** + * xmlValidityWarningFunc: + * @ctx: an xmlValidCtxtPtr validity error context + * @msg: the string to format *printf like vararg + * @...: remaining arguments to the format + * + * Callback called when a validity warning is found. This is a message + * oriented function similar to an *printf function. + */ +typedef void (*xmlValidityWarningFunc) (void *ctx, + const char *msg, + ...); + +/** + * xmlValidCtxt: + * An xmlValidCtxt is used for error reporting when validating. + */ +typedef struct _xmlValidCtxt xmlValidCtxt; +typedef xmlValidCtxt *xmlValidCtxtPtr; +struct _xmlValidCtxt { + void *userData; /* user specific data block */ + xmlValidityErrorFunc error; /* the callback in case of errors */ + xmlValidityWarningFunc warning; /* the callback in case of warning */ + + /* Node analysis stack used when validating within entities */ + xmlNodePtr node; /* Current parsed Node */ + int nodeNr; /* Depth of the parsing stack */ + int nodeMax; /* Max depth of the parsing stack */ + xmlNodePtr *nodeTab; /* array of nodes */ + + int finishDtd; /* finished validating the Dtd ? */ + xmlDocPtr doc; /* the document */ + int valid; /* temporary validity check result */ + + /* state state used for non-determinist content validation */ + xmlValidState *vstate; /* current state */ + int vstateNr; /* Depth of the validation stack */ + int vstateMax; /* Max depth of the validation stack */ + xmlValidState *vstateTab; /* array of validation states */ + +#ifdef LIBXML_REGEXP_ENABLED + xmlAutomataPtr am; /* the automata */ + xmlAutomataStatePtr state; /* used to build the automata */ +#else + void *am; + void *state; +#endif +}; + +/* + * ALL notation declarations are stored in a table. + * There is one table per DTD. + */ + +typedef struct _xmlHashTable xmlNotationTable; +typedef xmlNotationTable *xmlNotationTablePtr; + +/* + * ALL element declarations are stored in a table. + * There is one table per DTD. + */ + +typedef struct _xmlHashTable xmlElementTable; +typedef xmlElementTable *xmlElementTablePtr; + +/* + * ALL attribute declarations are stored in a table. + * There is one table per DTD. + */ + +typedef struct _xmlHashTable xmlAttributeTable; +typedef xmlAttributeTable *xmlAttributeTablePtr; + +/* + * ALL IDs attributes are stored in a table. + * There is one table per document. + */ + +typedef struct _xmlHashTable xmlIDTable; +typedef xmlIDTable *xmlIDTablePtr; + +/* + * ALL Refs attributes are stored in a table. + * There is one table per document. + */ + +typedef struct _xmlHashTable xmlRefTable; +typedef xmlRefTable *xmlRefTablePtr; + +/* helper */ +xmlChar * xmlSplitQName2 (const xmlChar *name, + xmlChar **prefix); + +/* Notation */ +xmlNotationPtr xmlAddNotationDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *name, + const xmlChar *PublicID, + const xmlChar *SystemID); +xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table); +void xmlFreeNotationTable(xmlNotationTablePtr table); +void xmlDumpNotationDecl (xmlBufferPtr buf, + xmlNotationPtr nota); +void xmlDumpNotationTable(xmlBufferPtr buf, + xmlNotationTablePtr table); + +/* Element Content */ +xmlElementContentPtr xmlNewElementContent (xmlChar *name, + xmlElementContentType type); +xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content); +void xmlFreeElementContent(xmlElementContentPtr cur); +void xmlSnprintfElementContent(char *buf, + int size, + xmlElementContentPtr content, + int glob); +/* DEPRECATED */ +void xmlSprintfElementContent(char *buf, + xmlElementContentPtr content, + int glob); +/* DEPRECATED */ + +/* Element */ +xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *name, + xmlElementTypeVal type, + xmlElementContentPtr content); +xmlElementTablePtr xmlCopyElementTable (xmlElementTablePtr table); +void xmlFreeElementTable (xmlElementTablePtr table); +void xmlDumpElementTable (xmlBufferPtr buf, + xmlElementTablePtr table); +void xmlDumpElementDecl (xmlBufferPtr buf, + xmlElementPtr elem); + +/* Enumeration */ +xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name); +void xmlFreeEnumeration (xmlEnumerationPtr cur); +xmlEnumerationPtr xmlCopyEnumeration (xmlEnumerationPtr cur); + +/* Attribute */ +xmlAttributePtr xmlAddAttributeDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name, + const xmlChar *ns, + xmlAttributeType type, + xmlAttributeDefault def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +xmlAttributeTablePtr xmlCopyAttributeTable (xmlAttributeTablePtr table); +void xmlFreeAttributeTable (xmlAttributeTablePtr table); +void xmlDumpAttributeTable (xmlBufferPtr buf, + xmlAttributeTablePtr table); +void xmlDumpAttributeDecl (xmlBufferPtr buf, + xmlAttributePtr attr); + +/* IDs */ +xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *value, + xmlAttrPtr attr); +void xmlFreeIDTable (xmlIDTablePtr table); +xmlAttrPtr xmlGetID (xmlDocPtr doc, + const xmlChar *ID); +int xmlIsID (xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr); +int xmlRemoveID (xmlDocPtr doc, xmlAttrPtr attr); + +/* IDREFs */ +xmlRefPtr xmlAddRef (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *value, + xmlAttrPtr attr); +void xmlFreeRefTable (xmlRefTablePtr table); +int xmlIsRef (xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr); +int xmlRemoveRef (xmlDocPtr doc, xmlAttrPtr attr); +xmlListPtr xmlGetRefs (xmlDocPtr doc, + const xmlChar *ID); + +/** + * The public function calls related to validity checking. + */ + +int xmlValidateRoot (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateElementDecl (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlElementPtr elem); +xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *name, + const xmlChar *value); +xmlChar * xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *name, + const xmlChar *value); +int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlAttributePtr attr); +int xmlValidateAttributeValue(xmlAttributeType type, + const xmlChar *value); +int xmlValidateNotationDecl (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNotationPtr nota); +int xmlValidateDtd (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlDtdPtr dtd); +int xmlValidateDtdFinal (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateDocument (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +int xmlValidateOneElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +int xmlValidateOneAttribute (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr, + const xmlChar *value); +int xmlValidateOneNamespace (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *prefix, + xmlNsPtr ns, + const xmlChar *value); +int xmlValidateDocumentFinal(xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateNotationUse (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *notationName); +int xmlIsMixedElement (xmlDocPtr doc, + const xmlChar *name); +xmlAttributePtr xmlGetDtdAttrDesc (xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name); +xmlAttributePtr xmlGetDtdQAttrDesc (xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name, + const xmlChar *prefix); +xmlNotationPtr xmlGetDtdNotationDesc (xmlDtdPtr dtd, + const xmlChar *name); +xmlElementPtr xmlGetDtdQElementDesc (xmlDtdPtr dtd, + const xmlChar *name, + const xmlChar *prefix); +xmlElementPtr xmlGetDtdElementDesc (xmlDtdPtr dtd, + const xmlChar *name); + +int xmlValidGetValidElements(xmlNode *prev, + xmlNode *next, + const xmlChar **list, + int max); +int xmlValidGetPotentialChildren(xmlElementContent *ctree, + const xmlChar **list, + int *len, + int max); +int xmlValidateNameValue (const xmlChar *value); +int xmlValidateNamesValue (const xmlChar *value); +int xmlValidateNmtokenValue (const xmlChar *value); +int xmlValidateNmtokensValue(const xmlChar *value); + +#ifdef LIBXML_REGEXP_ENABLED +/* + * Validation based on the regexp support + */ +int xmlValidBuildContentModel(xmlValidCtxtPtr ctxt, + xmlElementPtr elem); + +#endif /* LIBXML_REGEXP_ENABLED */ +#ifdef __cplusplus +} +#endif +#endif /* __XML_VALID_H__ */ diff --git a/bundle/libxml/include/libxml/xinclude.h b/bundle/libxml/include/libxml/xinclude.h new file mode 100644 index 0000000000..9c83ba0e5c --- /dev/null +++ b/bundle/libxml/include/libxml/xinclude.h @@ -0,0 +1,26 @@ +/* + * xinclude.c : API to handle XInclude processing + * + * World Wide Web Consortium Working Draft 26 October 2000 + * http://www.w3.org/TR/2000/WD-xinclude-20001026 + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_XINCLUDE_H__ +#define __XML_XINCLUDE_H__ + +#include <libxml/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int xmlXIncludeProcess (xmlDocPtr doc); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XINCLUDE_H__ */ diff --git a/bundle/libxml/include/libxml/xlink.h b/bundle/libxml/include/libxml/xlink.h new file mode 100644 index 0000000000..c7f48c8539 --- /dev/null +++ b/bundle/libxml/include/libxml/xlink.h @@ -0,0 +1,180 @@ +/* + * xlink.h : interfaces to the hyperlinks detection module + * + * See Copyright for the status of this software. + * + * Related specification: http://www.w3.org/TR/xlink + * http://www.w3.org/HTML/ + * and XBase + * + * daniel@veillard.com + */ + +#ifndef __XML_XLINK_H__ +#define __XML_XLINK_H__ + +#include <libxml/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif +/** + * Various defines for the various Link properties. + * + * NOTE: the link detection layer will try to resolve QName expansion + * of namespaces. If "foo" is the prefix for "http://foo.com/" + * then the link detection layer will expand role="foo:myrole" + * to "http://foo.com/:myrole". + * NOTE: the link detection layer will expand URI-Refences found on + * href attributes by using the base mechanism if found. + */ +typedef xmlChar *xlinkHRef; +typedef xmlChar *xlinkRole; +typedef xmlChar *xlinkTitle; + +typedef enum { + XLINK_TYPE_NONE = 0, + XLINK_TYPE_SIMPLE, + XLINK_TYPE_EXTENDED, + XLINK_TYPE_EXTENDED_SET +} xlinkType; + +typedef enum { + XLINK_SHOW_NONE = 0, + XLINK_SHOW_NEW, + XLINK_SHOW_EMBED, + XLINK_SHOW_REPLACE +} xlinkShow; + +typedef enum { + XLINK_ACTUATE_NONE = 0, + XLINK_ACTUATE_AUTO, + XLINK_ACTUATE_ONREQUEST +} xlinkActuate; + +/** + * xlinkNodeDetectFunc: + * @ctx: user data pointer + * @node: the node to check + * + * This is the prototype for the link detection routine. + * It calls the default link detection callbacks upon link detection. + */ +typedef void (*xlinkNodeDetectFunc) (void *ctx, xmlNodePtr node); + +/** + * The link detection module interact with the upper layers using + * a set of callback registered at parsing time. + */ + +/** + * xlinkSimpleLinkFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @href: the target of the link + * @role: the role string + * @title: the link title + * + * This is the prototype for a simple link detection callback. + */ +typedef void +(*xlinkSimpleLinkFunk) (void *ctx, + xmlNodePtr node, + const xlinkHRef href, + const xlinkRole role, + const xlinkTitle title); + +/** + * xlinkExtendedLinkFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @nbLocators: the number of locators detected on the link + * @hrefs: pointer to the array of locator hrefs + * @roles: pointer to the array of locator roles + * @nbArcs: the number of arcs detected on the link + * @from: pointer to the array of source roles found on the arcs + * @to: pointer to the array of target roles found on the arcs + * @show: array of values for the show attributes found on the arcs + * @actuate: array of values for the actuate attributes found on the arcs + * @nbTitles: the number of titles detected on the link + * @title: array of titles detected on the link + * @langs: array of xml:lang values for the titles + * + * This is the prototype for a extended link detection callback. + */ +typedef void +(*xlinkExtendedLinkFunk)(void *ctx, + xmlNodePtr node, + int nbLocators, + const xlinkHRef *hrefs, + const xlinkRole *roles, + int nbArcs, + const xlinkRole *from, + const xlinkRole *to, + xlinkShow *show, + xlinkActuate *actuate, + int nbTitles, + const xlinkTitle *titles, + const xmlChar **langs); + +/** + * xlinkExtendedLinkSetFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @nbLocators: the number of locators detected on the link + * @hrefs: pointer to the array of locator hrefs + * @roles: pointer to the array of locator roles + * @nbTitles: the number of titles detected on the link + * @title: array of titles detected on the link + * @langs: array of xml:lang values for the titles + * + * This is the prototype for a extended link set detection callback. + */ +typedef void +(*xlinkExtendedLinkSetFunk) (void *ctx, + xmlNodePtr node, + int nbLocators, + const xlinkHRef *hrefs, + const xlinkRole *roles, + int nbTitles, + const xlinkTitle *titles, + const xmlChar **langs); + +/** + * This is the structure containing a set of Links detection callbacks. + * + * There is no default xlink callbacks, if one want to get link + * recognition activated, those call backs must be provided before parsing. + */ +typedef struct _xlinkHandler xlinkHandler; +typedef xlinkHandler *xlinkHandlerPtr; +struct _xlinkHandler { + xlinkSimpleLinkFunk simple; + xlinkExtendedLinkFunk extended; + xlinkExtendedLinkSetFunk set; +}; + +/* + * The default detection routine, can be overridden, they call the default + * detection callbacks. + */ + +xlinkNodeDetectFunc xlinkGetDefaultDetect (void); +void xlinkSetDefaultDetect (xlinkNodeDetectFunc func); + +/* + * Routines to set/get the default handlers. + */ +xlinkHandlerPtr xlinkGetDefaultHandler (void); +void xlinkSetDefaultHandler (xlinkHandlerPtr handler); + +/* + * Link detection module itself. + */ +xlinkType xlinkIsLink (xmlDocPtr doc, + xmlNodePtr node); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XLINK_H__ */ diff --git a/bundle/libxml/include/libxml/xmlIO.h b/bundle/libxml/include/libxml/xmlIO.h new file mode 100644 index 0000000000..cd4058c88c --- /dev/null +++ b/bundle/libxml/include/libxml/xmlIO.h @@ -0,0 +1,283 @@ +/* + * xmlIO.h : interface for the I/O interfaces used by the parser + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + * + */ + +#ifndef __XML_IO_H__ +#define __XML_IO_H__ + +#include <stdio.h> +#include <libxml/xmlversion.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Those are the functions and datatypes for the parser input + * I/O structures. + */ + +/** + * xmlInputMatchCallback: + * @filename: the filename or URI + * + * Callback used in the I/O Input API to detect if the current handler + * can provide input fonctionnalities for this resource. + * + * Returns 1 if yes and 0 if another Input module should be used + */ +typedef int (*xmlInputMatchCallback) (char const *filename); +/** + * xmlInputOpenCallback: + * @filename: the filename or URI + * + * Callback used in the I/O Input API to open the resource + * + * Returns an Input context or NULL in case or error + */ +typedef void * (*xmlInputOpenCallback) (char const *filename); +/** + * xmlInputReadCallback: + * @context: an Input context + * @buffer: the buffer to store data read + * @len: the length of the buffer in bytes + * + * Callback used in the I/O Input API to read the resource + * + * Returns the number of bytes read or -1 in case of error + */ +typedef int (*xmlInputReadCallback) (void * context, char * buffer, int len); +/** + * xmlInputCloseCallback: + * @context: an Input context + * + * Callback used in the I/O Input API to close the resource + * + * Returns 0 or -1 in case of error + */ +typedef int (*xmlInputCloseCallback) (void * context); + +/* + * Those are the functions and datatypes for the library output + * I/O structures. + */ + +/** + * xmlOutputMatchCallback: + * @filename: the filename or URI + * + * Callback used in the I/O Output API to detect if the current handler + * can provide output fonctionnalities for this resource. + * + * Returns 1 if yes and 0 if another Output module should be used + */ +typedef int (*xmlOutputMatchCallback) (char const *filename); +/** + * xmlOutputOpenCallback: + * @filename: the filename or URI + * + * Callback used in the I/O Output API to open the resource + * + * Returns an Output context or NULL in case or error + */ +typedef void * (*xmlOutputOpenCallback) (char const *filename); +/** + * xmlOutputWriteCallback: + * @context: an Output context + * @buffer: the buffer of data to write + * @len: the length of the buffer in bytes + * + * Callback used in the I/O Output API to write to the resource + * + * Returns the number of bytes written or -1 in case of error + */ +typedef int (*xmlOutputWriteCallback) (void * context, const char * buffer, + int len); +/** + * xmlOutputCloseCallback: + * @context: an Output context + * + * Callback used in the I/O Output API to close the resource + * + * Returns 0 or -1 in case of error + */ +typedef int (*xmlOutputCloseCallback) (void * context); + +#ifdef __cplusplus +} +#endif + +#include <libxml/globals.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/encoding.h> + +#ifdef __cplusplus +extern "C" { +#endif +struct _xmlParserInputBuffer { + void* context; + xmlInputReadCallback readcallback; + xmlInputCloseCallback closecallback; + + xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */ + + xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */ + xmlBufferPtr raw; /* if encoder != NULL buffer for raw input */ +}; + + +struct _xmlOutputBuffer { + void* context; + xmlOutputWriteCallback writecallback; + xmlOutputCloseCallback closecallback; + + xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */ + + xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 or ISOLatin */ + xmlBufferPtr conv; /* if encoder != NULL buffer for output */ + int written; /* total number of byte written */ +}; + +/* + * Interfaces for input + */ +void xmlCleanupInputCallbacks (void); +void xmlCleanupOutputCallbacks (void); + +void xmlRegisterDefaultInputCallbacks (void); +xmlParserInputBufferPtr + xmlAllocParserInputBuffer (xmlCharEncoding enc); + +xmlParserInputBufferPtr + xmlParserInputBufferCreateFilename (const char *URI, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateFile (FILE *file, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateFd (int fd, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateMem (const char *mem, int size, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateIO (xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + xmlCharEncoding enc); +int xmlParserInputBufferRead (xmlParserInputBufferPtr in, + int len); +int xmlParserInputBufferGrow (xmlParserInputBufferPtr in, + int len); +int xmlParserInputBufferPush (xmlParserInputBufferPtr in, + int len, + const char *buf); +void xmlFreeParserInputBuffer (xmlParserInputBufferPtr in); +char * xmlParserGetDirectory (const char *filename); + +int xmlRegisterInputCallbacks (xmlInputMatchCallback matchFunc, + xmlInputOpenCallback openFunc, + xmlInputReadCallback readFunc, + xmlInputCloseCallback closeFunc); +/* + * Interfaces for output + */ +void xmlRegisterDefaultOutputCallbacks(void); +xmlOutputBufferPtr + xmlAllocOutputBuffer (xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateFilename (const char *URI, + xmlCharEncodingHandlerPtr encoder, + int compression); + +xmlOutputBufferPtr + xmlOutputBufferCreateFile (FILE *file, + xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateFd (int fd, + xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateIO (xmlOutputWriteCallback iowrite, + xmlOutputCloseCallback ioclose, + void *ioctx, + xmlCharEncodingHandlerPtr encoder); + +int xmlOutputBufferWrite (xmlOutputBufferPtr out, + int len, + const char *buf); +int xmlOutputBufferWriteString (xmlOutputBufferPtr out, + const char *str); + +int xmlOutputBufferFlush (xmlOutputBufferPtr out); +int xmlOutputBufferClose (xmlOutputBufferPtr out); + +int xmlRegisterOutputCallbacks (xmlOutputMatchCallback matchFunc, + xmlOutputOpenCallback openFunc, + xmlOutputWriteCallback writeFunc, + xmlOutputCloseCallback closeFunc); + +/* This function only exists if HTTP support built into the library */ +#ifdef LIBXML_HTTP_ENABLED +void * xmlIOHTTPOpenW (const char * post_uri, + int compression ); +void xmlRegisterHTTPPostCallbacks (void ); +#endif + +/* + * A predefined entity loader disabling network accesses + */ +xmlParserInputPtr xmlNoNetExternalEntityLoader(const char *URL, + const char *ID, + xmlParserCtxtPtr ctxt); + +xmlChar *xmlNormalizeWindowsPath (const xmlChar *path); + +int xmlCheckFilename (const char *path); +/** + * Default 'file://' protocol callbacks + */ +int xmlFileMatch (const char *filename); +void * xmlFileOpen (const char *filename); +int xmlFileRead (void * context, + char * buffer, + int len); +int xmlFileClose (void * context); + +/** + * Default 'http://' protocol callbacks + */ +#ifdef LIBXML_HTTP_ENABLED +int xmlIOHTTPMatch (const char *filename); +void * xmlIOHTTPOpen (const char *filename); +int xmlIOHTTPRead (void * context, + char * buffer, + int len); +int xmlIOHTTPClose (void * context); +#endif /* LIBXML_HTTP_ENABLED */ + +/** + * Default 'ftp://' protocol callbacks + */ +#ifdef LIBXML_FTP_ENABLED +int xmlIOFTPMatch (const char *filename); +void * xmlIOFTPOpen (const char *filename); +int xmlIOFTPRead (void * context, + char * buffer, + int len); +int xmlIOFTPClose (void * context); +#endif /* LIBXML_FTP_ENABLED */ + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_IO_H__ */ diff --git a/bundle/libxml/include/libxml/xmlautomata.h b/bundle/libxml/include/libxml/xmlautomata.h new file mode 100644 index 0000000000..96ba2245b3 --- /dev/null +++ b/bundle/libxml/include/libxml/xmlautomata.h @@ -0,0 +1,94 @@ +/* + * automata.h : description of the API to build regexp automats + * + * See Copyright for the status of this software. + * + * Daniel Veillard <veillard@redhat.com> + */ + +#ifndef __XML_AUTOMATA_H__ +#define __XML_AUTOMATA_H__ + +#include <libxml/xmlversion.h> +#include <libxml/tree.h> + +#ifdef LIBXML_AUTOMATA_ENABLED +#include <libxml/xmlregexp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlAutomataPtr: + * + * A libxml automata description, It can be compiled into a regexp + */ +typedef struct _xmlAutomata xmlAutomata; +typedef xmlAutomata *xmlAutomataPtr; + +/** + * xmlAutomataStatePtr: + * + * A state int the automata description, + */ +typedef struct _xmlAutomataState xmlAutomataState; +typedef xmlAutomataState *xmlAutomataStatePtr; + +/* + * Building API + */ +xmlAutomataPtr xmlNewAutomata (void); +void xmlFreeAutomata (xmlAutomataPtr am); + +xmlAutomataStatePtr xmlAutomataGetInitState (xmlAutomataPtr am); +int xmlAutomataSetFinalState(xmlAutomataPtr am, + xmlAutomataStatePtr state); +xmlAutomataStatePtr xmlAutomataNewState (xmlAutomataPtr am); +xmlAutomataStatePtr xmlAutomataNewTransition(xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + void *data); +xmlAutomataStatePtr xmlAutomataNewCountTrans(xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + int min, + int max, + void *data); +xmlAutomataStatePtr xmlAutomataNewOnceTrans (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + int min, + int max, + void *data); +xmlAutomataStatePtr xmlAutomataNewAllTrans (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + int lax); +xmlAutomataStatePtr xmlAutomataNewEpsilon (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to); +xmlAutomataStatePtr xmlAutomataNewCountedTrans(xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + int counter); +xmlAutomataStatePtr xmlAutomataNewCounterTrans(xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + int counter); +int xmlAutomataNewCounter (xmlAutomataPtr am, + int min, + int max); + +xmlRegexpPtr xmlAutomataCompile (xmlAutomataPtr am); +int xmlAutomataIsDeterminist(xmlAutomataPtr am); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_AUTOMATA_ENABLED */ +#endif /* __XML_AUTOMATA_H__ */ diff --git a/bundle/libxml/include/libxml/xmlerror.h b/bundle/libxml/include/libxml/xmlerror.h new file mode 100644 index 0000000000..33e2275910 --- /dev/null +++ b/bundle/libxml/include/libxml/xmlerror.h @@ -0,0 +1,184 @@ +#include <libxml/parser.h> + +#ifndef __XML_ERROR_H__ +#define __XML_ERROR_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_ERR_OK = 0, + XML_ERR_INTERNAL_ERROR, + XML_ERR_NO_MEMORY, + + XML_ERR_DOCUMENT_START, /* 3 */ + XML_ERR_DOCUMENT_EMPTY, + XML_ERR_DOCUMENT_END, + + XML_ERR_INVALID_HEX_CHARREF, /* 6 */ + XML_ERR_INVALID_DEC_CHARREF, + XML_ERR_INVALID_CHARREF, + XML_ERR_INVALID_CHAR, + + XML_ERR_CHARREF_AT_EOF, /* 10 */ + XML_ERR_CHARREF_IN_PROLOG, + XML_ERR_CHARREF_IN_EPILOG, + XML_ERR_CHARREF_IN_DTD, + XML_ERR_ENTITYREF_AT_EOF, + XML_ERR_ENTITYREF_IN_PROLOG, + XML_ERR_ENTITYREF_IN_EPILOG, + XML_ERR_ENTITYREF_IN_DTD, + XML_ERR_PEREF_AT_EOF, + XML_ERR_PEREF_IN_PROLOG, + XML_ERR_PEREF_IN_EPILOG, + XML_ERR_PEREF_IN_INT_SUBSET, + + XML_ERR_ENTITYREF_NO_NAME, /* 22 */ + XML_ERR_ENTITYREF_SEMICOL_MISSING, + + XML_ERR_PEREF_NO_NAME, /* 24 */ + XML_ERR_PEREF_SEMICOL_MISSING, + + XML_ERR_UNDECLARED_ENTITY, /* 26 */ + XML_WAR_UNDECLARED_ENTITY, + XML_ERR_UNPARSED_ENTITY, + XML_ERR_ENTITY_IS_EXTERNAL, + XML_ERR_ENTITY_IS_PARAMETER, + + XML_ERR_UNKNOWN_ENCODING, /* 31 */ + XML_ERR_UNSUPPORTED_ENCODING, + + XML_ERR_STRING_NOT_STARTED, /* 33 */ + XML_ERR_STRING_NOT_CLOSED, + XML_ERR_NS_DECL_ERROR, + + XML_ERR_ENTITY_NOT_STARTED, /* 36 */ + XML_ERR_ENTITY_NOT_FINISHED, + + XML_ERR_LT_IN_ATTRIBUTE, /* 38 */ + XML_ERR_ATTRIBUTE_NOT_STARTED, + XML_ERR_ATTRIBUTE_NOT_FINISHED, + XML_ERR_ATTRIBUTE_WITHOUT_VALUE, + XML_ERR_ATTRIBUTE_REDEFINED, + + XML_ERR_LITERAL_NOT_STARTED, /* 43 */ + XML_ERR_LITERAL_NOT_FINISHED, + + XML_ERR_COMMENT_NOT_FINISHED, /* 45 */ + + XML_ERR_PI_NOT_STARTED, /* 47 */ + XML_ERR_PI_NOT_FINISHED, + + XML_ERR_NOTATION_NOT_STARTED, /* 49 */ + XML_ERR_NOTATION_NOT_FINISHED, + + XML_ERR_ATTLIST_NOT_STARTED, /* 51 */ + XML_ERR_ATTLIST_NOT_FINISHED, + + XML_ERR_MIXED_NOT_STARTED, /* 53 */ + XML_ERR_MIXED_NOT_FINISHED, + + XML_ERR_ELEMCONTENT_NOT_STARTED, /* 55 */ + XML_ERR_ELEMCONTENT_NOT_FINISHED, + + XML_ERR_XMLDECL_NOT_STARTED, /* 57 */ + XML_ERR_XMLDECL_NOT_FINISHED, + + XML_ERR_CONDSEC_NOT_STARTED, /* 59 */ + XML_ERR_CONDSEC_NOT_FINISHED, + + XML_ERR_EXT_SUBSET_NOT_FINISHED, /* 61 */ + + XML_ERR_DOCTYPE_NOT_FINISHED, /* 62 */ + + XML_ERR_MISPLACED_CDATA_END, /* 63 */ + XML_ERR_CDATA_NOT_FINISHED, + + XML_ERR_RESERVED_XML_NAME, /* 65 */ + + XML_ERR_SPACE_REQUIRED, /* 66 */ + XML_ERR_SEPARATOR_REQUIRED, + XML_ERR_NMTOKEN_REQUIRED, + XML_ERR_NAME_REQUIRED, + XML_ERR_PCDATA_REQUIRED, + XML_ERR_URI_REQUIRED, + XML_ERR_PUBID_REQUIRED, + XML_ERR_LT_REQUIRED, + XML_ERR_GT_REQUIRED, + XML_ERR_LTSLASH_REQUIRED, + XML_ERR_EQUAL_REQUIRED, + + XML_ERR_TAG_NAME_MISMATCH, /* 77 */ + XML_ERR_TAG_NOT_FINISHED, + + XML_ERR_STANDALONE_VALUE, /* 79 */ + + XML_ERR_ENCODING_NAME, /* 80 */ + + XML_ERR_HYPHEN_IN_COMMENT, /* 81 */ + + XML_ERR_INVALID_ENCODING, /* 82 */ + + XML_ERR_EXT_ENTITY_STANDALONE, /* 83 */ + + XML_ERR_CONDSEC_INVALID, /* 84 */ + + XML_ERR_VALUE_REQUIRED, /* 85 */ + + XML_ERR_NOT_WELL_BALANCED, /* 86 */ + XML_ERR_EXTRA_CONTENT, /* 87 */ + XML_ERR_ENTITY_CHAR_ERROR, /* 88 */ + XML_ERR_ENTITY_PE_INTERNAL, /* 88 */ + XML_ERR_ENTITY_LOOP, /* 89 */ + XML_ERR_ENTITY_BOUNDARY, /* 90 */ + XML_ERR_INVALID_URI, /* 91 */ + XML_ERR_URI_FRAGMENT, /* 92 */ + XML_WAR_CATALOG_PI, /* 93 */ + XML_ERR_NO_DTD /* 94 */ +}xmlParserErrors; + +/** + * xmlGenericErrorFunc: + * @ctx: a parsing context + * @msg: the message + * @...: the extra arguments of the varags to format the message + * + * Signature of the function to use when there is an error and + * no parsing or validity context available . + */ +typedef void (*xmlGenericErrorFunc) (void *ctx, + const char *msg, + ...); + +/* + * Use the following function to reset the two global variables + * xmlGenericError and xmlGenericErrorContext. + */ +void xmlSetGenericErrorFunc (void *ctx, + xmlGenericErrorFunc handler); +void initGenericErrorDefaultFunc(xmlGenericErrorFunc *handler); + +/* + * Default message routines used by SAX and Valid context for error + * and warning reporting. + */ +void xmlParserError (void *ctx, + const char *msg, + ...); +void xmlParserWarning (void *ctx, + const char *msg, + ...); +void xmlParserValidityError (void *ctx, + const char *msg, + ...); +void xmlParserValidityWarning(void *ctx, + const char *msg, + ...); +void xmlParserPrintFileInfo (xmlParserInputPtr input); +void xmlParserPrintFileContext(xmlParserInputPtr input); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_ERROR_H__ */ diff --git a/bundle/libxml/include/libxml/xmlmemory.h b/bundle/libxml/include/libxml/xmlmemory.h new file mode 100644 index 0000000000..8e8df944c1 --- /dev/null +++ b/bundle/libxml/include/libxml/xmlmemory.h @@ -0,0 +1,169 @@ +/* + * xmlmemory.h: interface for the memory allocation debug. + * + * daniel@veillard.com + */ + + +#ifndef _DEBUG_MEMORY_ALLOC_ +#define _DEBUG_MEMORY_ALLOC_ + +#include <stdio.h> +#include <libxml/xmlversion.h> + +/** + * DEBUG_MEMORY: + * + * DEBUG_MEMORY replaces the allocator with a collect and debug + * shell to the libc allocator. + * DEBUG_MEMORY should only be activated when debugging + * libxml i.e. if libxml has been configured with --with-debug-mem too. + */ +/* #define DEBUG_MEMORY_FREED */ +/* #define DEBUG_MEMORY_LOCATION */ + +#ifdef DEBUG +#ifndef DEBUG_MEMORY +#define DEBUG_MEMORY +#endif +#endif + +/** + * DEBUG_MEMORY_LOCATION: + * + * DEBUG_MEMORY_LOCATION should be activated only when debugging + * libxml i.e. if libxml has been configured with --with-debug-mem too. + */ +#ifdef DEBUG_MEMORY_LOCATION +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The XML memory wrapper support 4 basic overloadable functions. + */ +/** + * xmlFreeFunc: + * @mem: an already allocated block of memory + * + * Signature for a free() implementation. + */ +typedef void (*xmlFreeFunc)(void *mem); +/** + * xmlMallocFunc: + * @size: the size requested in bytes + * + * Signature for a malloc() implementation. + * + * Returns a pointer to the newly allocated block or NULL in case of error. + */ +typedef void *(*xmlMallocFunc)(size_t size); + +/** + * xmlReallocFunc: + * @mem: an already allocated block of memory + * @size: the new size requested in bytes + * + * Signature for a realloc() implementation. + * + * Returns a pointer to the newly reallocated block or NULL in case of error. + */ +typedef void *(*xmlReallocFunc)(void *mem, size_t size); + +/** + * xmlStrdupFunc: + * @str: a zero terminated string + * + * Signature for an strdup() implementation. + * + * Returns the copy of the string or NULL in case of error. + */ +typedef char *(*xmlStrdupFunc)(const char *str); + +/* + * The 4 interfaces used for all memory handling within libxml. +LIBXML_DLL_IMPORT extern xmlFreeFunc xmlFree; +LIBXML_DLL_IMPORT extern xmlMallocFunc xmlMalloc; +LIBXML_DLL_IMPORT extern xmlReallocFunc xmlRealloc; +LIBXML_DLL_IMPORT extern xmlStrdupFunc xmlMemStrdup; + */ + +/* + * The way to overload the existing functions. + */ +int xmlMemSetup (xmlFreeFunc freeFunc, + xmlMallocFunc mallocFunc, + xmlReallocFunc reallocFunc, + xmlStrdupFunc strdupFunc); +int xmlMemGet (xmlFreeFunc *freeFunc, + xmlMallocFunc *mallocFunc, + xmlReallocFunc *reallocFunc, + xmlStrdupFunc *strdupFunc); + +/* + * Initialization of the memory layer. + */ +int xmlInitMemory (void); + +/* + * Those are specific to the XML debug memory wrapper. + */ +int xmlMemUsed (void); +void xmlMemDisplay (FILE *fp); +void xmlMemShow (FILE *fp, int nr); +void xmlMemoryDump (void); +void * xmlMemMalloc (size_t size); +void * xmlMemRealloc (void *ptr,size_t size); +void xmlMemFree (void *ptr); +char * xmlMemoryStrdup (const char *str); + +#ifdef DEBUG_MEMORY_LOCATION +/** + * xmlMalloc: + * @size: number of bytes to allocate + * + * Wrapper for the malloc() function used in the XML library. + * + * Returns the pointer to the allocated area or NULL in case of error. + */ +#define xmlMalloc(size) xmlMallocLoc((size), __FILE__, __LINE__) +/** + * xmlRealloc: + * @ptr: pointer to the existing allocated area + * @size: number of bytes to allocate + * + * Wrapper for the realloc() function used in the XML library. + * + * Returns the pointer to the allocated area or NULL in case of error. + */ +#define xmlRealloc(ptr, size) xmlReallocLoc((ptr), (size), __FILE__, __LINE__) +/** + * xmlMemStrdup: + * @str: pointer to the existing string + * + * Wrapper for the strdup() function, xmlStrdup() is usually preferred. + * + * Returns the pointer to the allocated area or NULL in case of error. + */ +#define xmlMemStrdup(str) xmlMemStrdupLoc((str), __FILE__, __LINE__) + +void * xmlMallocLoc(size_t size, const char *file, int line); +void * xmlReallocLoc(void *ptr,size_t size, const char *file, int line); +char * xmlMemStrdupLoc(const char *str, const char *file, int line); +#endif /* DEBUG_MEMORY_LOCATION */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#ifndef __XML_GLOBALS_H +#ifndef __XML_THREADS_H__ +#include <libxml/threads.h> +#include <libxml/globals.h> +#endif +#endif + +#endif /* _DEBUG_MEMORY_ALLOC_ */ + diff --git a/bundle/libxml/include/libxml/xmlreader.h b/bundle/libxml/include/libxml/xmlreader.h new file mode 100644 index 0000000000..8e4b05783b --- /dev/null +++ b/bundle/libxml/include/libxml/xmlreader.h @@ -0,0 +1,56 @@ +/* + * xmlreader.h : Interfaces, constants and types of the XML streaming API. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_XMLREADER_H__ +#define __XML_XMLREADER_H__ + +#include <libxml/tree.h> +#include <libxml/xmlIO.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _xmlTextReader xmlTextReader; +typedef xmlTextReader *xmlTextReaderPtr; + +/* + * Constructors & Destructor + */ +xmlTextReaderPtr xmlNewTextReader (xmlParserInputBufferPtr input); +xmlTextReaderPtr xmlNewTextReaderFilename(const char *URI); +void xmlFreeTextReader (xmlTextReaderPtr reader); + +/* + * Iterators + */ +int xmlTextReaderRead (xmlTextReaderPtr reader); + +/* + * Attributes of the node + */ +int xmlTextReaderAttributeCount(xmlTextReaderPtr reader); +xmlChar * xmlTextReaderBaseUri (xmlTextReaderPtr reader); +int xmlTextReaderDepth (xmlTextReaderPtr reader); +int xmlTextReaderHasAttributes(xmlTextReaderPtr reader); +int xmlTextReaderHasValue(xmlTextReaderPtr reader); +int xmlTextReaderIsDefault (xmlTextReaderPtr reader); +int xmlTextReaderIsEmptyElement(xmlTextReaderPtr reader); +xmlChar * xmlTextReaderLocalName (xmlTextReaderPtr reader); +xmlChar * xmlTextReaderName (xmlTextReaderPtr reader); +xmlChar * xmlTextReaderNamespaceUri(xmlTextReaderPtr reader); +int xmlTextReaderNodeType (xmlTextReaderPtr reader); +xmlChar * xmlTextReaderPrefix (xmlTextReaderPtr reader); +int xmlTextReaderQuoteChar (xmlTextReaderPtr reader); +xmlChar * xmlTextReaderValue (xmlTextReaderPtr reader); +xmlChar * xmlTextReaderXmlLang (xmlTextReaderPtr reader); +#ifdef __cplusplus +} +#endif +#endif /* __XML_XMLREADER_H__ */ + diff --git a/bundle/libxml/include/libxml/xmlregexp.h b/bundle/libxml/include/libxml/xmlregexp.h new file mode 100644 index 0000000000..434b7a2bd9 --- /dev/null +++ b/bundle/libxml/include/libxml/xmlregexp.h @@ -0,0 +1,81 @@ +/* + * regexp.h : describes the basic API for libxml regular expressions handling + * + * See Copyright for the status of this software. + * + * Daniel Veillard <veillard@redhat.com> + */ + +#ifndef __XML_REGEXP_H__ +#define __XML_REGEXP_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_REGEXP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlRegexpPtr: + * + * A libxml regular expression, they can actually be far more complex + * thank the POSIX regex expressions. + */ +typedef struct _xmlRegexp xmlRegexp; +typedef xmlRegexp *xmlRegexpPtr; + +/** + * xmlRegExecCtxtPtr: + * + * A libxml progressive regular expression evaluation context + */ +typedef struct _xmlRegExecCtxt xmlRegExecCtxt; +typedef xmlRegExecCtxt *xmlRegExecCtxtPtr; + +#ifdef __cplusplus +} +#endif +#include <libxml/tree.h> +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The POSIX like API + */ +xmlRegexpPtr xmlRegexpCompile(const xmlChar *regexp); +void xmlRegFreeRegexp(xmlRegexpPtr regexp); +int xmlRegexpExec (xmlRegexpPtr comp, + const xmlChar *value); +void xmlRegexpPrint (FILE *output, + xmlRegexpPtr regexp); +int xmlRegexpIsDeterminist(xmlRegexpPtr comp); + +/* + * Callback function when doing a transition in the automata + */ +typedef void (*xmlRegExecCallbacks) (xmlRegExecCtxtPtr exec, + const xmlChar *token, + void *transdata, + void *inputdata); + +/* + * The progressive API + */ +xmlRegExecCtxtPtr xmlRegNewExecCtxt (xmlRegexpPtr comp, + xmlRegExecCallbacks callback, + void *data); +void xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec); +int xmlRegExecPushString (xmlRegExecCtxtPtr exec, + const xmlChar *value, + void *data); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_REGEXP_ENABLED */ + +#endif /*__XML_REGEXP_H__ */ diff --git a/bundle/libxml/include/libxml/xmlschemas.h b/bundle/libxml/include/libxml/xmlschemas.h new file mode 100644 index 0000000000..14b9230832 --- /dev/null +++ b/bundle/libxml/include/libxml/xmlschemas.h @@ -0,0 +1,106 @@ +/* + * schemas.h : interface to the XML Schemas handling and schema validity + * checking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef __XML_SCHEMA_H__ +#define __XML_SCHEMA_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include <libxml/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_SCHEMAS_ERR_OK = 0, + XML_SCHEMAS_ERR_NOROOT = 1, + XML_SCHEMAS_ERR_UNDECLAREDELEM, + XML_SCHEMAS_ERR_NOTTOPLEVEL, + XML_SCHEMAS_ERR_MISSING, + XML_SCHEMAS_ERR_WRONGELEM, + XML_SCHEMAS_ERR_NOTYPE, + XML_SCHEMAS_ERR_NOROLLBACK, + XML_SCHEMAS_ERR_ISABSTRACT, + XML_SCHEMAS_ERR_NOTEMPTY, + XML_SCHEMAS_ERR_ELEMCONT, + XML_SCHEMAS_ERR_HAVEDEFAULT, + XML_SCHEMAS_ERR_NOTNILLABLE, + XML_SCHEMAS_ERR_EXTRACONTENT, + XML_SCHEMAS_ERR_INVALIDATTR, + XML_SCHEMAS_ERR_INVALIDELEM, + XML_SCHEMAS_ERR_NOTDETERMINIST, + XML_SCHEMAS_ERR_CONSTRUCT, + XML_SCHEMAS_ERR_INTERNAL, + XML_SCHEMAS_ERR_NOTSIMPLE, + XML_SCHEMAS_ERR_ATTRUNKNOWN, + XML_SCHEMAS_ERR_ATTRINVALID, + XML_SCHEMAS_ERR_, + XML_SCHEMAS_ERR_XXX +} xmlSchemaValidError; + + +/** + * The schemas related types are kept internal + */ +typedef struct _xmlSchema xmlSchema; +typedef xmlSchema *xmlSchemaPtr; + +/** + * A schemas validation context + */ +typedef void (*xmlSchemaValidityErrorFunc) (void *ctx, const char *msg, ...); +typedef void (*xmlSchemaValidityWarningFunc) (void *ctx, const char *msg, ...); + +typedef struct _xmlSchemaParserCtxt xmlSchemaParserCtxt; +typedef xmlSchemaParserCtxt *xmlSchemaParserCtxtPtr; + +typedef struct _xmlSchemaValidCtxt xmlSchemaValidCtxt; +typedef xmlSchemaValidCtxt *xmlSchemaValidCtxtPtr; + +/* + * Interfaces for parsing. + */ +xmlSchemaParserCtxtPtr xmlSchemaNewParserCtxt (const char *URL); +xmlSchemaParserCtxtPtr xmlSchemaNewMemParserCtxt(const char *buffer, + int size); +void xmlSchemaFreeParserCtxt (xmlSchemaParserCtxtPtr ctxt); +void xmlSchemaSetParserErrors(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, + void *ctx); +xmlSchemaPtr xmlSchemaParse (xmlSchemaParserCtxtPtr ctxt); +void xmlSchemaFree (xmlSchemaPtr schema); +void xmlSchemaDump (FILE *output, + xmlSchemaPtr schema); +/* + * Interfaces for validating + */ +void xmlSchemaSetValidErrors (xmlSchemaValidCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, + void *ctx); +xmlSchemaValidCtxtPtr xmlSchemaNewValidCtxt (xmlSchemaPtr schema); +void xmlSchemaFreeValidCtxt (xmlSchemaValidCtxtPtr ctxt); +int xmlSchemaValidateDoc (xmlSchemaValidCtxtPtr ctxt, + xmlDocPtr instance); +int xmlSchemaValidateStream (xmlSchemaValidCtxtPtr ctxt, + xmlParserInputBufferPtr input, + xmlCharEncoding enc, + xmlSAXHandlerPtr sax, + void *user_data); +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_H__ */ diff --git a/bundle/libxml/include/libxml/xmlschemastypes.h b/bundle/libxml/include/libxml/xmlschemastypes.h new file mode 100644 index 0000000000..a758c128e5 --- /dev/null +++ b/bundle/libxml/include/libxml/xmlschemastypes.h @@ -0,0 +1,42 @@ +/* + * schemastypes.c : interface of the XML Schema Datatypes + * definition and validity checking + * + * See Copyright for the status of this software. + * + * Daniel Veillard <veillard@redhat.com> + */ + + +#ifndef __XML_SCHEMA_TYPES_H__ +#define __XML_SCHEMA_TYPES_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include <libxml/schemasInternals.h> + +#ifdef __cplusplus +extern "C" { +#endif + +void xmlSchemaInitTypes (void); +void xmlSchemaCleanupTypes (void); +xmlSchemaTypePtr xmlSchemaGetPredefinedType (const xmlChar *name, + const xmlChar *ns); +int xmlSchemaValidatePredefinedType (xmlSchemaTypePtr type, + const xmlChar *value, + xmlSchemaValPtr *val); +int xmlSchemaValidateFacet (xmlSchemaTypePtr base, + xmlSchemaFacetPtr facet, + const xmlChar *value, + xmlSchemaValPtr val); +void xmlSchemaFreeValue (xmlSchemaValPtr val); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_TYPES_H__ */ diff --git a/bundle/libxml/include/libxml/xmlunicode.h b/bundle/libxml/include/libxml/xmlunicode.h new file mode 100644 index 0000000000..f0f1fe9ce9 --- /dev/null +++ b/bundle/libxml/include/libxml/xmlunicode.h @@ -0,0 +1,164 @@ +/* + * xmlunicode.h: this header exports interfaces for the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html + * using the genUnicode.py Python script. + * + * Generation date: Tue Apr 16 17:28:05 2002 + * Sources: Blocks-4.txt UnicodeData-3.1.0.txt + * Daniel Veillard <veillard@redhat.com> + */ + +#ifndef __XML_UNICODE_H__ +#define __XML_UNICODE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +int xmlUCSIsAlphabeticPresentationForms (int code); +int xmlUCSIsArabic (int code); +int xmlUCSIsArabicPresentationFormsA (int code); +int xmlUCSIsArabicPresentationFormsB (int code); +int xmlUCSIsArmenian (int code); +int xmlUCSIsArrows (int code); +int xmlUCSIsBasicLatin (int code); +int xmlUCSIsBengali (int code); +int xmlUCSIsBlockElements (int code); +int xmlUCSIsBopomofo (int code); +int xmlUCSIsBopomofoExtended (int code); +int xmlUCSIsBoxDrawing (int code); +int xmlUCSIsBraillePatterns (int code); +int xmlUCSIsByzantineMusicalSymbols (int code); +int xmlUCSIsCJKCompatibility (int code); +int xmlUCSIsCJKCompatibilityForms (int code); +int xmlUCSIsCJKCompatibilityIdeographs (int code); +int xmlUCSIsCJKCompatibilityIdeographsSupplement (int code); +int xmlUCSIsCJKRadicalsSupplement (int code); +int xmlUCSIsCJKSymbolsandPunctuation (int code); +int xmlUCSIsCJKUnifiedIdeographs (int code); +int xmlUCSIsCJKUnifiedIdeographsExtensionA (int code); +int xmlUCSIsCJKUnifiedIdeographsExtensionB (int code); +int xmlUCSIsCherokee (int code); +int xmlUCSIsCombiningDiacriticalMarks (int code); +int xmlUCSIsCombiningHalfMarks (int code); +int xmlUCSIsCombiningMarksforSymbols (int code); +int xmlUCSIsControlPictures (int code); +int xmlUCSIsCurrencySymbols (int code); +int xmlUCSIsCyrillic (int code); +int xmlUCSIsDeseret (int code); +int xmlUCSIsDevanagari (int code); +int xmlUCSIsDingbats (int code); +int xmlUCSIsEnclosedAlphanumerics (int code); +int xmlUCSIsEnclosedCJKLettersandMonths (int code); +int xmlUCSIsEthiopic (int code); +int xmlUCSIsGeneralPunctuation (int code); +int xmlUCSIsGeometricShapes (int code); +int xmlUCSIsGeorgian (int code); +int xmlUCSIsGothic (int code); +int xmlUCSIsGreek (int code); +int xmlUCSIsGreekExtended (int code); +int xmlUCSIsGujarati (int code); +int xmlUCSIsGurmukhi (int code); +int xmlUCSIsHalfwidthandFullwidthForms (int code); +int xmlUCSIsHangulCompatibilityJamo (int code); +int xmlUCSIsHangulJamo (int code); +int xmlUCSIsHangulSyllables (int code); +int xmlUCSIsHebrew (int code); +int xmlUCSIsHighPrivateUseSurrogates (int code); +int xmlUCSIsHighSurrogates (int code); +int xmlUCSIsHiragana (int code); +int xmlUCSIsIPAExtensions (int code); +int xmlUCSIsIdeographicDescriptionCharacters (int code); +int xmlUCSIsKanbun (int code); +int xmlUCSIsKangxiRadicals (int code); +int xmlUCSIsKannada (int code); +int xmlUCSIsKatakana (int code); +int xmlUCSIsKhmer (int code); +int xmlUCSIsLao (int code); +int xmlUCSIsLatin1Supplement (int code); +int xmlUCSIsLatinExtendedA (int code); +int xmlUCSIsLatinExtendedB (int code); +int xmlUCSIsLatinExtendedAdditional (int code); +int xmlUCSIsLetterlikeSymbols (int code); +int xmlUCSIsLowSurrogates (int code); +int xmlUCSIsMalayalam (int code); +int xmlUCSIsMathematicalAlphanumericSymbols (int code); +int xmlUCSIsMathematicalOperators (int code); +int xmlUCSIsMiscellaneousSymbols (int code); +int xmlUCSIsMiscellaneousTechnical (int code); +int xmlUCSIsMongolian (int code); +int xmlUCSIsMusicalSymbols (int code); +int xmlUCSIsMyanmar (int code); +int xmlUCSIsNumberForms (int code); +int xmlUCSIsOgham (int code); +int xmlUCSIsOldItalic (int code); +int xmlUCSIsOpticalCharacterRecognition (int code); +int xmlUCSIsOriya (int code); +int xmlUCSIsPrivateUse (int code); +int xmlUCSIsRunic (int code); +int xmlUCSIsSinhala (int code); +int xmlUCSIsSmallFormVariants (int code); +int xmlUCSIsSpacingModifierLetters (int code); +int xmlUCSIsSpecials (int code); +int xmlUCSIsSuperscriptsandSubscripts (int code); +int xmlUCSIsSyriac (int code); +int xmlUCSIsTags (int code); +int xmlUCSIsTamil (int code); +int xmlUCSIsTelugu (int code); +int xmlUCSIsThaana (int code); +int xmlUCSIsThai (int code); +int xmlUCSIsTibetan (int code); +int xmlUCSIsUnifiedCanadianAboriginalSyllabics (int code); +int xmlUCSIsYiRadicals (int code); +int xmlUCSIsYiSyllables (int code); + +int xmlUCSIsBlock (int code, + const char *block); + +int xmlUCSIsCatC (int code); +int xmlUCSIsCatCc (int code); +int xmlUCSIsCatCf (int code); +int xmlUCSIsCatCo (int code); +int xmlUCSIsCatCs (int code); +int xmlUCSIsCatL (int code); +int xmlUCSIsCatLl (int code); +int xmlUCSIsCatLm (int code); +int xmlUCSIsCatLo (int code); +int xmlUCSIsCatLt (int code); +int xmlUCSIsCatLu (int code); +int xmlUCSIsCatM (int code); +int xmlUCSIsCatMc (int code); +int xmlUCSIsCatMe (int code); +int xmlUCSIsCatMn (int code); +int xmlUCSIsCatN (int code); +int xmlUCSIsCatNd (int code); +int xmlUCSIsCatNl (int code); +int xmlUCSIsCatNo (int code); +int xmlUCSIsCatP (int code); +int xmlUCSIsCatPc (int code); +int xmlUCSIsCatPd (int code); +int xmlUCSIsCatPe (int code); +int xmlUCSIsCatPf (int code); +int xmlUCSIsCatPi (int code); +int xmlUCSIsCatPo (int code); +int xmlUCSIsCatPs (int code); +int xmlUCSIsCatS (int code); +int xmlUCSIsCatSc (int code); +int xmlUCSIsCatSk (int code); +int xmlUCSIsCatSm (int code); +int xmlUCSIsCatSo (int code); +int xmlUCSIsCatZ (int code); +int xmlUCSIsCatZl (int code); +int xmlUCSIsCatZp (int code); +int xmlUCSIsCatZs (int code); + +int xmlUCSIsCat (int code, + const char *cat); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_UNICODE_H__ */ diff --git a/bundle/libxml/include/libxml/xmlversion.h b/bundle/libxml/include/libxml/xmlversion.h new file mode 100644 index 0000000000..6ec30a48cb --- /dev/null +++ b/bundle/libxml/include/libxml/xmlversion.h @@ -0,0 +1,272 @@ +/* + * xmlversion.h : compile-time version informations for the XML parser. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_VERSION_H__ +#define __XML_VERSION_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * use those to be sure nothing nasty will happen if + * your library and includes mismatch + */ +#ifndef LIBXML2_COMPILING_MSCCDEF +extern void xmlCheckVersion(int version); +#endif /* LIBXML2_COMPILING_MSCCDEF */ + +/** + * LIBXML_DOTTED_VERSION: + * + * the version string like "1.2.3" + */ +#define LIBXML_DOTTED_VERSION "2.4.30" + +/** + * LIBXML_VERSION: + * + * the version number: 1.2.3 value is 1002003 + */ +#define LIBXML_VERSION 20430 + +/** + * LIBXML_VERSION_STRING: + * + * the version number string, 1.2.3 value is "1002003" + */ +#define LIBXML_VERSION_STRING "20430" + +/** + * LIBXML_TEST_VERSION: + * + * Macro to check that the libxml version in use is compatible with + * the version the software has been compiled against + */ +#define LIBXML_TEST_VERSION xmlCheckVersion(20430); + +#ifndef VMS +#if 0 +/** + * WITH_TRIO: + * + * defined if the trio support need to be configured in + */ +#define WITH_TRIO +#else +/** + * WITHOUT_TRIO: + * + * defined if the trio support should not be configured in + */ +#define WITHOUT_TRIO +#endif +#else /* VMS */ +#define WITH_TRIO 1 +#endif /* VMS */ + +/** + * LIBXML_THREAD_ENABLED: + * + * Whether the thread support is configured in + */ +#if 0 +#if defined(_REENTRANT) || (_POSIX_C_SOURCE - 0 >= 199506L) +#define LIBXML_THREAD_ENABLED +#endif +#endif + +/** + * LIBXML_FTP_ENABLED: + * + * Whether the FTP support is configured in + */ +#if 1 +#define LIBXML_FTP_ENABLED +#endif + +/** + * LIBXML_HTTP_ENABLED: + * + * Whether the HTTP support is configured in + */ +#if 1 +#define LIBXML_HTTP_ENABLED +#endif + +/** + * LIBXML_HTML_ENABLED: + * + * Whether the HTML support is configured in + */ +#if 1 +#define LIBXML_HTML_ENABLED +#endif + +/** + * LIBXML_C14N_ENABLED: + * + * Whether the Canonicalization support is configured in + */ +#if 1 +#define LIBXML_C14N_ENABLED +#endif + +/** + * LIBXML_CATALOG_ENABLED: + * + * Whether the Catalog support is configured in + */ +#if 1 +#define LIBXML_CATALOG_ENABLED +#endif + +/** + * LIBXML_DOCB_ENABLED: + * + * Whether the SGML Docbook support is configured in + */ +#if 1 +#define LIBXML_DOCB_ENABLED +#endif + +/** + * LIBXML_XPATH_ENABLED: + * + * Whether XPath is configured in + */ +#if 1 +#define LIBXML_XPATH_ENABLED +#endif + +/** + * LIBXML_XPTR_ENABLED: + * + * Whether XPointer is configured in + */ +#if 1 +#define LIBXML_XPTR_ENABLED +#endif + +/** + * LIBXML_XINCLUDE_ENABLED: + * + * Whether XInclude is configured in + */ +#if 1 +#define LIBXML_XINCLUDE_ENABLED +#endif + +/** + * LIBXML_ICONV_ENABLED: + * + * Whether iconv support is available + */ +#if 1 +#define LIBXML_ICONV_ENABLED +#endif + +/** + * LIBXML_DEBUG_ENABLED: + * + * Whether Debugging module is configured in + */ +#if 1 +#define LIBXML_DEBUG_ENABLED +#endif + +/** + * DEBUG_MEMORY_LOCATION: + * + * Whether the memory debugging is configured in + */ +#if 1 +#define DEBUG_MEMORY_LOCATION +#endif + +/** + * LIBXML_UNICODE_ENABLED + * + * Whether the Unicode related interfaces are compiled in + */ +#if 1 +#define LIBXML_UNICODE_ENABLED +#endif + +/** + * LIBXML_REGEXP_ENABLED + * + * Whether the regular expressions interfaces are compiled in + */ +#if 1 +#define LIBXML_REGEXP_ENABLED +#endif + +/** + * LIBXML_AUTOMATA_ENABLED + * + * Whether the automata interfaces are compiled in + */ +#if 1 +#define LIBXML_AUTOMATA_ENABLED +#endif + +/** + * LIBXML_SCHEMAS_ENABLED + * + * Whether the Schemas validation interfaces are compiled in + */ +#if 1 +#define LIBXML_SCHEMAS_ENABLED +#endif + +/** + * LIBXML_DLL_IMPORT: + * + * Used on Windows (MS C compiler only) to declare a variable as + * imported from the library. This macro should be empty when compiling + * libxml itself. It should expand to __declspec(dllimport) + * when the client code includes this header, and that only if the client + * links dynamically against libxml. + * For this to work, we need three macros. One tells us which compiler is + * being used and luckily the compiler defines such a thing: _MSC_VER. The + * second macro tells us if we are compiling libxml or the client code and + * we define the macro IN_LIBXML on the compiler's command line for this + * purpose. The third macro, LIBXML_STATIC, must be defined by any client + * code which links against libxml statically. + */ +#ifndef LIBXML_DLL_IMPORT +#if (defined(_MSC_VER) || defined(__CYGWIN__)) && !defined(IN_LIBXML) && !defined(LIBXML_STATIC) +#define LIBXML_DLL_IMPORT __declspec(dllimport) +#else +#define LIBXML_DLL_IMPORT +#endif +#endif + +/** + * ATTRIBUTE_UNUSED: + * + * Macro used to signal to GCC unused function parameters + */ +#ifdef __GNUC__ +#ifdef HAVE_ANSIDECL_H +#include <ansidecl.h> +#endif +#ifndef ATTRIBUTE_UNUSED +#define ATTRIBUTE_UNUSED +#endif +#else +#define ATTRIBUTE_UNUSED +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif + + diff --git a/bundle/libxml/include/libxml/xmlversion.h.in b/bundle/libxml/include/libxml/xmlversion.h.in new file mode 100644 index 0000000000..94def1804b --- /dev/null +++ b/bundle/libxml/include/libxml/xmlversion.h.in @@ -0,0 +1,272 @@ +/* + * xmlversion.h : compile-time version informations for the XML parser. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_VERSION_H__ +#define __XML_VERSION_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * use those to be sure nothing nasty will happen if + * your library and includes mismatch + */ +#ifndef LIBXML2_COMPILING_MSCCDEF +extern void xmlCheckVersion(int version); +#endif /* LIBXML2_COMPILING_MSCCDEF */ + +/** + * LIBXML_DOTTED_VERSION: + * + * the version string like "1.2.3" + */ +#define LIBXML_DOTTED_VERSION "@VERSION@" + +/** + * LIBXML_VERSION: + * + * the version number: 1.2.3 value is 1002003 + */ +#define LIBXML_VERSION @LIBXML_VERSION_NUMBER@ + +/** + * LIBXML_VERSION_STRING: + * + * the version number string, 1.2.3 value is "1002003" + */ +#define LIBXML_VERSION_STRING "@LIBXML_VERSION_NUMBER@" + +/** + * LIBXML_TEST_VERSION: + * + * Macro to check that the libxml version in use is compatible with + * the version the software has been compiled against + */ +#define LIBXML_TEST_VERSION xmlCheckVersion(@LIBXML_VERSION_NUMBER@); + +#ifndef VMS +#if @WITH_TRIO@ +/** + * WITH_TRIO: + * + * defined if the trio support need to be configured in + */ +#define WITH_TRIO +#else +/** + * WITHOUT_TRIO: + * + * defined if the trio support should not be configured in + */ +#define WITHOUT_TRIO +#endif +#else /* VMS */ +#define WITH_TRIO 1 +#endif /* VMS */ + +/** + * LIBXML_THREAD_ENABLED: + * + * Whether the thread support is configured in + */ +#if @WITH_THREADS@ +#if defined(_REENTRANT) || (_POSIX_C_SOURCE - 0 >= 199506L) +#define LIBXML_THREAD_ENABLED +#endif +#endif + +/** + * LIBXML_FTP_ENABLED: + * + * Whether the FTP support is configured in + */ +#if @WITH_FTP@ +#define LIBXML_FTP_ENABLED +#endif + +/** + * LIBXML_HTTP_ENABLED: + * + * Whether the HTTP support is configured in + */ +#if @WITH_HTTP@ +#define LIBXML_HTTP_ENABLED +#endif + +/** + * LIBXML_HTML_ENABLED: + * + * Whether the HTML support is configured in + */ +#if @WITH_HTML@ +#define LIBXML_HTML_ENABLED +#endif + +/** + * LIBXML_C14N_ENABLED: + * + * Whether the Canonicalization support is configured in + */ +#if @WITH_C14N@ +#define LIBXML_C14N_ENABLED +#endif + +/** + * LIBXML_CATALOG_ENABLED: + * + * Whether the Catalog support is configured in + */ +#if @WITH_CATALOG@ +#define LIBXML_CATALOG_ENABLED +#endif + +/** + * LIBXML_DOCB_ENABLED: + * + * Whether the SGML Docbook support is configured in + */ +#if @WITH_DOCB@ +#define LIBXML_DOCB_ENABLED +#endif + +/** + * LIBXML_XPATH_ENABLED: + * + * Whether XPath is configured in + */ +#if @WITH_XPATH@ +#define LIBXML_XPATH_ENABLED +#endif + +/** + * LIBXML_XPTR_ENABLED: + * + * Whether XPointer is configured in + */ +#if @WITH_XPTR@ +#define LIBXML_XPTR_ENABLED +#endif + +/** + * LIBXML_XINCLUDE_ENABLED: + * + * Whether XInclude is configured in + */ +#if @WITH_XINCLUDE@ +#define LIBXML_XINCLUDE_ENABLED +#endif + +/** + * LIBXML_ICONV_ENABLED: + * + * Whether iconv support is available + */ +#if @WITH_ICONV@ +#define LIBXML_ICONV_ENABLED +#endif + +/** + * LIBXML_DEBUG_ENABLED: + * + * Whether Debugging module is configured in + */ +#if @WITH_DEBUG@ +#define LIBXML_DEBUG_ENABLED +#endif + +/** + * DEBUG_MEMORY_LOCATION: + * + * Whether the memory debugging is configured in + */ +#if @WITH_MEM_DEBUG@ +#define DEBUG_MEMORY_LOCATION +#endif + +/** + * LIBXML_UNICODE_ENABLED + * + * Whether the Unicode related interfaces are compiled in + */ +#if @WITH_REGEXPS@ +#define LIBXML_UNICODE_ENABLED +#endif + +/** + * LIBXML_REGEXP_ENABLED + * + * Whether the regular expressions interfaces are compiled in + */ +#if @WITH_REGEXPS@ +#define LIBXML_REGEXP_ENABLED +#endif + +/** + * LIBXML_AUTOMATA_ENABLED + * + * Whether the automata interfaces are compiled in + */ +#if @WITH_REGEXPS@ +#define LIBXML_AUTOMATA_ENABLED +#endif + +/** + * LIBXML_SCHEMAS_ENABLED + * + * Whether the Schemas validation interfaces are compiled in + */ +#if @WITH_SCHEMAS@ +#define LIBXML_SCHEMAS_ENABLED +#endif + +/** + * LIBXML_DLL_IMPORT: + * + * Used on Windows (MS C compiler only) to declare a variable as + * imported from the library. This macro should be empty when compiling + * libxml itself. It should expand to __declspec(dllimport) + * when the client code includes this header, and that only if the client + * links dynamically against libxml. + * For this to work, we need three macros. One tells us which compiler is + * being used and luckily the compiler defines such a thing: _MSC_VER. The + * second macro tells us if we are compiling libxml or the client code and + * we define the macro IN_LIBXML on the compiler's command line for this + * purpose. The third macro, LIBXML_STATIC, must be defined by any client + * code which links against libxml statically. + */ +#ifndef LIBXML_DLL_IMPORT +#if (defined(_MSC_VER) || defined(__CYGWIN__)) && !defined(IN_LIBXML) && !defined(LIBXML_STATIC) +#define LIBXML_DLL_IMPORT __declspec(dllimport) +#else +#define LIBXML_DLL_IMPORT +#endif +#endif + +/** + * ATTRIBUTE_UNUSED: + * + * Macro used to signal to GCC unused function parameters + */ +#ifdef __GNUC__ +#ifdef HAVE_ANSIDECL_H +#include <ansidecl.h> +#endif +#ifndef ATTRIBUTE_UNUSED +#define ATTRIBUTE_UNUSED +#endif +#else +#define ATTRIBUTE_UNUSED +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif + + diff --git a/bundle/libxml/include/libxml/xpath.h b/bundle/libxml/include/libxml/xpath.h new file mode 100644 index 0000000000..f36b16f938 --- /dev/null +++ b/bundle/libxml/include/libxml/xpath.h @@ -0,0 +1,410 @@ +/* + * xpath.c: interface for XML Path Language implementation + * + * Reference: W3C Working Draft 5 July 1999 + * http://www.w3.org/Style/XSL/Group/1999/07/xpath-19990705.html + * + * See COPYRIGHT for the status of this software + * + * Author: daniel@veillard.com + */ + +#ifndef __XML_XPATH_H__ +#define __XML_XPATH_H__ + +#include <libxml/tree.h> +#include <libxml/hash.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _xmlXPathContext xmlXPathContext; +typedef xmlXPathContext *xmlXPathContextPtr; +typedef struct _xmlXPathParserContext xmlXPathParserContext; +typedef xmlXPathParserContext *xmlXPathParserContextPtr; + +/** + * The set of XPath error codes. + */ + +typedef enum { + XPATH_EXPRESSION_OK = 0, + XPATH_NUMBER_ERROR, + XPATH_UNFINISHED_LITERAL_ERROR, + XPATH_START_LITERAL_ERROR, + XPATH_VARIABLE_REF_ERROR, + XPATH_UNDEF_VARIABLE_ERROR, + XPATH_INVALID_PREDICATE_ERROR, + XPATH_EXPR_ERROR, + XPATH_UNCLOSED_ERROR, + XPATH_UNKNOWN_FUNC_ERROR, + XPATH_INVALID_OPERAND, + XPATH_INVALID_TYPE, + XPATH_INVALID_ARITY, + XPATH_INVALID_CTXT_SIZE, + XPATH_INVALID_CTXT_POSITION, + XPATH_MEMORY_ERROR, + XPTR_SYNTAX_ERROR, + XPTR_RESOURCE_ERROR, + XPTR_SUB_RESOURCE_ERROR, + XPATH_UNDEF_PREFIX_ERROR, + XPATH_ENCODING_ERROR, + XPATH_INVALID_CHAR_ERROR +} xmlXPathError; + +/* + * A node-set (an unordered collection of nodes without duplicates). + */ +typedef struct _xmlNodeSet xmlNodeSet; +typedef xmlNodeSet *xmlNodeSetPtr; +struct _xmlNodeSet { + int nodeNr; /* number of nodes in the set */ + int nodeMax; /* size of the array as allocated */ + xmlNodePtr *nodeTab; /* array of nodes in no particular order */ + /* @@ with_ns to check wether namespace nodes should be looked at @@ */ +}; + +/* + * An expression is evaluated to yield an object, which + * has one of the following four basic types: + * - node-set + * - boolean + * - number + * - string + * + * @@ XPointer will add more types ! + */ + +typedef enum { + XPATH_UNDEFINED = 0, + XPATH_NODESET = 1, + XPATH_BOOLEAN = 2, + XPATH_NUMBER = 3, + XPATH_STRING = 4, + XPATH_POINT = 5, + XPATH_RANGE = 6, + XPATH_LOCATIONSET = 7, + XPATH_USERS = 8, + XPATH_XSLT_TREE = 9 /* An XSLT value tree, non modifiable */ +} xmlXPathObjectType; + +typedef struct _xmlXPathObject xmlXPathObject; +typedef xmlXPathObject *xmlXPathObjectPtr; +struct _xmlXPathObject { + xmlXPathObjectType type; + xmlNodeSetPtr nodesetval; + int boolval; + double floatval; + xmlChar *stringval; + void *user; + int index; + void *user2; + int index2; +}; + +/** + * xmlXPathConvertFunc: + * @obj: an XPath object + * @type: the number of the target type + * + * A conversion function is associated to a type and used to cast + * the new type to primitive values. + * + * Returns -1 in case of error, 0 otherwise + */ +typedef int (*xmlXPathConvertFunc) (xmlXPathObjectPtr obj, int type); + +/* + * Extra type: a name and a conversion function. + */ + +typedef struct _xmlXPathType xmlXPathType; +typedef xmlXPathType *xmlXPathTypePtr; +struct _xmlXPathType { + const xmlChar *name; /* the type name */ + xmlXPathConvertFunc func; /* the conversion function */ +}; + +/* + * Extra variable: a name and a value. + */ + +typedef struct _xmlXPathVariable xmlXPathVariable; +typedef xmlXPathVariable *xmlXPathVariablePtr; +struct _xmlXPathVariable { + const xmlChar *name; /* the variable name */ + xmlXPathObjectPtr value; /* the value */ +}; + +/** + * xmlXPathEvalFunc: + * @ctxt: an XPath parser context + * @nargs: the number of arguments passed to the function + * + * An XPath evaluation function, the parameters are on the XPath context stack. + */ + +typedef void (*xmlXPathEvalFunc)(xmlXPathParserContextPtr ctxt, + int nargs); + +/* + * Extra function: a name and a evaluation function. + */ + +typedef struct _xmlXPathFunct xmlXPathFunct; +typedef xmlXPathFunct *xmlXPathFuncPtr; +struct _xmlXPathFunct { + const xmlChar *name; /* the function name */ + xmlXPathEvalFunc func; /* the evaluation function */ +}; + +/** + * xmlXPathAxisFunc: + * @ctxt: the XPath interpreter context + * @cur: the previous node being explored on that axis + * + * An axis traversal function. To traverse an axis, the engine calls + * the first time with cur == NULL and repeat until the function returns + * NULL indicating the end of the axis traversal. + * + * Returns the next node in that axis or NULL if at the end of the axis. + */ + +typedef xmlXPathObjectPtr (*xmlXPathAxisFunc) (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr cur); + +/* + * Extra axis: a name and an axis function. + */ + +typedef struct _xmlXPathAxis xmlXPathAxis; +typedef xmlXPathAxis *xmlXPathAxisPtr; +struct _xmlXPathAxis { + const xmlChar *name; /* the axis name */ + xmlXPathAxisFunc func; /* the search function */ +}; + +/** + * xmlXPathContext: + * + * Expression evaluation occurs with respect to a context. + * he context consists of: + * - a node (the context node) + * - a node list (the context node list) + * - a set of variable bindings + * - a function library + * - the set of namespace declarations in scope for the expression + * Following the switch to hash tables, this need to be trimmed up at + * the next binary incompatible release. + */ + +struct _xmlXPathContext { + xmlDocPtr doc; /* The current document */ + xmlNodePtr node; /* The current node */ + + int nb_variables_unused; /* unused (hash table) */ + int max_variables_unused; /* unused (hash table) */ + xmlHashTablePtr varHash; /* Hash table of defined variables */ + + int nb_types; /* number of defined types */ + int max_types; /* max number of types */ + xmlXPathTypePtr types; /* Array of defined types */ + + int nb_funcs_unused; /* unused (hash table) */ + int max_funcs_unused; /* unused (hash table) */ + xmlHashTablePtr funcHash; /* Hash table of defined funcs */ + + int nb_axis; /* number of defined axis */ + int max_axis; /* max number of axis */ + xmlXPathAxisPtr axis; /* Array of defined axis */ + + /* the namespace nodes of the context node */ + xmlNsPtr *namespaces; /* Array of namespaces */ + int nsNr; /* number of namespace in scope */ + void *user; /* function to free */ + + /* extra variables */ + int contextSize; /* the context size */ + int proximityPosition; /* the proximity position */ + + /* extra stuff for XPointer */ + int xptr; /* it this an XPointer context */ + xmlNodePtr here; /* for here() */ + xmlNodePtr origin; /* for origin() */ + + /* the set of namespace declarations in scope for the expression */ + xmlHashTablePtr nsHash; /* The namespaces hash table */ + void *varLookupFunc; /* variable lookup func */ + void *varLookupData; /* variable lookup data */ + + /* Possibility to link in an extra item */ + void *extra; /* needed for XSLT */ + + /* The function name and URI when calling a function */ + const xmlChar *function; + const xmlChar *functionURI; + + /* function lookup function and data */ + void *funcLookupFunc; /* function lookup func */ + void *funcLookupData; /* function lookup data */ + + /* temporary namespace lists kept for walking the namespace axis */ + xmlNsPtr *tmpNsList; /* Array of namespaces */ + int tmpNsNr; /* number of namespace in scope */ +}; + +/* + * The structure of a compiled expression form is not public. + */ + +typedef struct _xmlXPathCompExpr xmlXPathCompExpr; +typedef xmlXPathCompExpr *xmlXPathCompExprPtr; + +/** + * xmlXPathParserContext: + * + * An XPath parser context. It contains pure parsing informations, + * an xmlXPathContext, and the stack of objects. + */ +struct _xmlXPathParserContext { + const xmlChar *cur; /* the current char being parsed */ + const xmlChar *base; /* the full expression */ + + int error; /* error code */ + + xmlXPathContextPtr context; /* the evaluation context */ + xmlXPathObjectPtr value; /* the current value */ + int valueNr; /* number of values stacked */ + int valueMax; /* max number of values stacked */ + xmlXPathObjectPtr *valueTab; /* stack of values */ + + xmlXPathCompExprPtr comp; /* the precompiled expression */ + int xptr; /* it this an XPointer expression */ + xmlNodePtr ancestor; /* used for walking preceding axis */ +}; + +/** + * xmlXPathFunction: + * @ctxt: the XPath interprestation context + * @nargs: the number of arguments + * + * An XPath function. + * The arguments (if any) are popped out from the context stack + * and the result is pushed on the stack. + */ + +typedef void (*xmlXPathFunction) (xmlXPathParserContextPtr ctxt, int nargs); + +/************************************************************************ + * * + * Public API * + * * + ************************************************************************/ + +/** + * Objects and Nodesets handling + */ + +LIBXML_DLL_IMPORT extern double xmlXPathNAN; +LIBXML_DLL_IMPORT extern double xmlXPathPINF; +LIBXML_DLL_IMPORT extern double xmlXPathNINF; + +int xmlXPathIsNaN (double val); +int xmlXPathIsInf (double val); + +/* These macros may later turn into functions */ +/** + * xmlXPathNodeSetGetLength: + * @ns: a node-set + * + * Implement a functionality similar to the DOM NodeList.length. + * + * Returns the number of nodes in the node-set. + */ +#define xmlXPathNodeSetGetLength(ns) ((ns) ? (ns)->nodeNr : 0) +/** + * xmlXPathNodeSetItem: + * @ns: a node-set + * @index: index of a node in the set + * + * Implements a functionality similar to the DOM NodeList.item(). + * + * Returns the xmlNodePtr at the given @index in @ns or NULL if + * @index is out of range (0 to length-1) + */ +#define xmlXPathNodeSetItem(ns, index) \ + ((((ns) != NULL) && \ + ((index) >= 0) && ((index) < (ns)->nodeNr)) ? \ + (ns)->nodeTab[(index)] \ + : NULL) +/** + * xmlXPathNodeSetIsEmpty: + * @ns: a node-set + * + * Checks whether @ns is empty or not. + * + * Returns %TRUE if @ns is an empty node-set. + */ +#define xmlXPathNodeSetIsEmpty(ns) \ + (((ns) == NULL) || ((ns)->nodeNr == 0) || ((ns)->nodeTab == NULL)) + + +void xmlXPathFreeObject (xmlXPathObjectPtr obj); +xmlNodeSetPtr xmlXPathNodeSetCreate (xmlNodePtr val); +void xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj); +void xmlXPathFreeNodeSet (xmlNodeSetPtr obj); +xmlXPathObjectPtr xmlXPathObjectCopy (xmlXPathObjectPtr val); +int xmlXPathCmpNodes (xmlNodePtr node1, + xmlNodePtr node2); +/** + * Conversion functions to basic types. + */ +int xmlXPathCastNumberToBoolean (double val); +int xmlXPathCastStringToBoolean (const xmlChar * val); +int xmlXPathCastNodeSetToBoolean (xmlNodeSetPtr ns); +int xmlXPathCastToBoolean (xmlXPathObjectPtr val); + +double xmlXPathCastBooleanToNumber (int val); +double xmlXPathCastStringToNumber (const xmlChar * val); +double xmlXPathCastNodeToNumber (xmlNodePtr node); +double xmlXPathCastNodeSetToNumber (xmlNodeSetPtr ns); +double xmlXPathCastToNumber (xmlXPathObjectPtr val); + +xmlChar * xmlXPathCastBooleanToString (int val); +xmlChar * xmlXPathCastNumberToString (double val); +xmlChar * xmlXPathCastNodeToString (xmlNodePtr node); +xmlChar * xmlXPathCastNodeSetToString (xmlNodeSetPtr ns); +xmlChar * xmlXPathCastToString (xmlXPathObjectPtr val); + +xmlXPathObjectPtr xmlXPathConvertBoolean (xmlXPathObjectPtr val); +xmlXPathObjectPtr xmlXPathConvertNumber (xmlXPathObjectPtr val); +xmlXPathObjectPtr xmlXPathConvertString (xmlXPathObjectPtr val); + +/** + * Context handling. + */ +void xmlXPathInit (void); +xmlXPathContextPtr xmlXPathNewContext (xmlDocPtr doc); +void xmlXPathFreeContext (xmlXPathContextPtr ctxt); + +/** + * Evaluation functions. + */ +xmlXPathObjectPtr xmlXPathEval (const xmlChar *str, + xmlXPathContextPtr ctx); +xmlXPathObjectPtr xmlXPathEvalExpression (const xmlChar *str, + xmlXPathContextPtr ctxt); +int xmlXPathEvalPredicate (xmlXPathContextPtr ctxt, + xmlXPathObjectPtr res); +/** + * Separate compilation/evaluation entry points. + */ +xmlXPathCompExprPtr xmlXPathCompile (const xmlChar *str); +xmlXPathObjectPtr xmlXPathCompiledEval (xmlXPathCompExprPtr comp, + xmlXPathContextPtr ctx); +void xmlXPathFreeCompExpr (xmlXPathCompExprPtr comp); +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_XPATH_H__ */ diff --git a/bundle/libxml/include/libxml/xpathInternals.h b/bundle/libxml/include/libxml/xpathInternals.h new file mode 100644 index 0000000000..59a4e35d53 --- /dev/null +++ b/bundle/libxml/include/libxml/xpathInternals.h @@ -0,0 +1,580 @@ +/* + * xpathInternals.c: internal interfaces for XML Path Language implementation + * used to build new modules on top of XPath + * + * See COPYRIGHT for the status of this software + * + * Author: daniel@veillard.com + */ + +#ifndef __XML_XPATH_INTERNALS_H__ +#define __XML_XPATH_INTERNALS_H__ + +#include <libxml/xmlversion.h> +#include <libxml/xpath.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/************************************************************************ + * * + * Helpers * + * * + ************************************************************************/ + +/** + * Many of these macros may later turn into functions. They + * shouldn't be used in #ifdef's preprocessor instructions. + */ +/** + * xmlXPathSetError: + * @ctxt: an XPath parser context + * @err: an xmlXPathError code + * + * Raises an error. + */ +#define xmlXPathSetError(ctxt, err) \ + { xmlXPatherror((ctxt), __FILE__, __LINE__, (err)); \ + (ctxt)->error = (err); } + +/** + * xmlXPathSetArityError: + * @ctxt: an XPath parser context + * + * Raises an XPATH_INVALID_ARITY error. + */ +#define xmlXPathSetArityError(ctxt) \ + xmlXPathSetError((ctxt), XPATH_INVALID_ARITY) + +/** + * xmlXPathSetTypeError: + * @ctxt: an XPath parser context + * + * Raises an XPATH_INVALID_TYPE error. + */ +#define xmlXPathSetTypeError(ctxt) \ + xmlXPathSetError((ctxt), XPATH_INVALID_TYPE) + +/** + * xmlXPathGetError: + * @ctxt: an XPath parser context + * + * Get the error code of an XPath context. + * + * Returns the context error. + */ +#define xmlXPathGetError(ctxt) ((ctxt)->error) + +/** + * xmlXPathCheckError: + * @ctxt: an XPath parser context + * + * Check if an XPath error was raised. + * + * Returns true if an error has been raised, false otherwise. + */ +#define xmlXPathCheckError(ctxt) ((ctxt)->error != XPATH_EXPRESSION_OK) + +/** + * xmlXPathGetDocument: + * @ctxt: an XPath parser context + * + * Get the document of an XPath context. + * + * Returns the context document. + */ +#define xmlXPathGetDocument(ctxt) ((ctxt)->context->doc) + +/** + * xmlXPathGetContextNode: + * @ctxt: an XPath parser context + * + * Get the context node of an XPath context. + * + * Returns the context node. + */ +#define xmlXPathGetContextNode(ctxt) ((ctxt)->context->node) + +int xmlXPathPopBoolean (xmlXPathParserContextPtr ctxt); +double xmlXPathPopNumber (xmlXPathParserContextPtr ctxt); +xmlChar * xmlXPathPopString (xmlXPathParserContextPtr ctxt); +xmlNodeSetPtr xmlXPathPopNodeSet (xmlXPathParserContextPtr ctxt); +void * xmlXPathPopExternal (xmlXPathParserContextPtr ctxt); + +/** + * xmlXPathReturnBoolean: + * @ctxt: an XPath parser context + * @val: a boolean + * + * Pushes the boolean @val on the context stack. + */ +#define xmlXPathReturnBoolean(ctxt, val) \ + valuePush((ctxt), xmlXPathNewBoolean(val)) + +/** + * xmlXPathReturnTrue: + * @ctxt: an XPath parser context + * + * Pushes true on the context stack. + */ +#define xmlXPathReturnTrue(ctxt) xmlXPathReturnBoolean((ctxt), 1) + +/** + * xmlXPathReturnFalse: + * @ctxt: an XPath parser context + * + * Pushes false on the context stack. + */ +#define xmlXPathReturnFalse(ctxt) xmlXPathReturnBoolean((ctxt), 0) + +/** + * xmlXPathReturnNumber: + * @ctxt: an XPath parser context + * @val: a double + * + * Pushes the double @val on the context stack. + */ +#define xmlXPathReturnNumber(ctxt, val) \ + valuePush((ctxt), xmlXPathNewFloat(val)) + +/** + * xmlXPathReturnString: + * @ctxt: an XPath parser context + * @str: a string + * + * Pushes the string @str on the context stack. + */ +#define xmlXPathReturnString(ctxt, str) \ + valuePush((ctxt), xmlXPathWrapString(str)) + +/** + * xmlXPathReturnEmptyString: + * @ctxt: an XPath parser context + * + * Pushes an empty string on the stack. + */ +#define xmlXPathReturnEmptyString(ctxt) \ + valuePush((ctxt), xmlXPathNewCString("")) + +/** + * xmlXPathReturnNodeSet: + * @ctxt: an XPath parser context + * @ns: a node-set + * + * Pushes the node-set @ns on the context stack. + */ +#define xmlXPathReturnNodeSet(ctxt, ns) \ + valuePush((ctxt), xmlXPathWrapNodeSet(ns)) + +/** + * xmlXPathReturnEmptyNodeSet: + * @ctxt: an XPath parser context + * + * Pushes an empty node-set on the context stack. + */ +#define xmlXPathReturnEmptyNodeSet(ctxt) \ + valuePush((ctxt), xmlXPathNewNodeSet(NULL)) + +/** + * xmlXPathReturnExternal: + * @ctxt: an XPath parser context + * @val: user data + * + * Pushes user data on the context stack. + */ +#define xmlXPathReturnExternal(ctxt, val) \ + valuePush((ctxt), xmlXPathWrapExternal(val)) + +/** + * xmlXPathStackIsNodeSet: + * @ctxt: an XPath parser context + * + * Check if the current value on the XPath stack is a node set or + * an XSLT value tree. + * + * Returns true if the current object on the stack is a node-set. + */ +#define xmlXPathStackIsNodeSet(ctxt) \ + (((ctxt)->value != NULL) \ + && (((ctxt)->value->type == XPATH_NODESET) \ + || ((ctxt)->value->type == XPATH_XSLT_TREE))) + +/** + * xmlXPathStackIsExternal: + * @ctxt: an XPath parser context + * + * Checks if the current value on the XPath stack is an external + * object. + * + * Returns true if the current object on the stack is an external + * object. + */ +#define xmlXPathStackIsExternal(ctxt) \ + ((ctxt->value != NULL) && (ctxt->value->type == XPATH_USERS)) + +/** + * xmlXPathEmptyNodeSet: + * @ns: a node-set + * + * Empties a node-set. + */ +#define xmlXPathEmptyNodeSet(ns) \ + { while ((ns)->nodeNr > 0) (ns)->nodeTab[(ns)->nodeNr--] = NULL; } + +/** + * CHECK_ERROR: + * + * Macro to return from the function if an XPath error was detected. + */ +#define CHECK_ERROR \ + if (ctxt->error != XPATH_EXPRESSION_OK) return + +/** + * CHECK_ERROR0: + * + * Macro to return 0 from the function if an XPath error was detected. + */ +#define CHECK_ERROR0 \ + if (ctxt->error != XPATH_EXPRESSION_OK) return(0) + +/** + * XP_ERROR: + * @X: the error code + * + * Macro to raise an XPath error and return. + */ +#define XP_ERROR(X) \ + { xmlXPatherror(ctxt, __FILE__, __LINE__, X); \ + ctxt->error = (X); return; } + +/** + * XP_ERROR0: + * @X: the error code + * + * Macro to raise an XPath error and return 0. + */ +#define XP_ERROR0(X) \ + { xmlXPatherror(ctxt, __FILE__, __LINE__, X); \ + ctxt->error = (X); return(0); } + +/** + * CHECK_TYPE: + * @typeval: the XPath type + * + * Macro to check that the value on top of the XPath stack is of a given + * type. + */ +#define CHECK_TYPE(typeval) \ + if ((ctxt->value == NULL) || (ctxt->value->type != typeval)) \ + XP_ERROR(XPATH_INVALID_TYPE) + +/** + * CHECK_TYPE0: + * @typeval: the XPath type + * + * Macro to check that the value on top of the XPath stack is of a given + * type. Return(0) in case of failure + */ +#define CHECK_TYPE0(typeval) \ + if ((ctxt->value == NULL) || (ctxt->value->type != typeval)) \ + XP_ERROR0(XPATH_INVALID_TYPE) + +/** + * CHECK_ARITY: + * @x: the number of expected args + * + * Macro to check that the number of args passed to an XPath function matches. + */ +#define CHECK_ARITY(x) \ + if (nargs != (x)) \ + XP_ERROR(XPATH_INVALID_ARITY); + +/** + * CAST_TO_STRING: + * + * Macro to try to cast the value on the top of the XPath stack to a string. + */ +#define CAST_TO_STRING \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_STRING)) \ + xmlXPathStringFunction(ctxt, 1); + +/** + * CAST_TO_NUMBER: + * + * Macro to try to cast the value on the top of the XPath stack to a number. + */ +#define CAST_TO_NUMBER \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_NUMBER)) \ + xmlXPathNumberFunction(ctxt, 1); + +/** + * CAST_TO_BOOLEAN: + * + * Macro to try to cast the value on the top of the XPath stack to a boolean. + */ +#define CAST_TO_BOOLEAN \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_BOOLEAN)) \ + xmlXPathBooleanFunction(ctxt, 1); + +/* + * Variable Lookup forwarding. + */ +/** + * xmlXPathVariableLookupFunc: + * @ctxt: an XPath context + * @name: name of the variable + * @ns_uri: the namespace name hosting this variable + * + * Prototype for callbacks used to plug variable lookup in the XPath + * engine. + * + * Returns the XPath object value or NULL if not found. + */ +typedef xmlXPathObjectPtr (*xmlXPathVariableLookupFunc) (void *ctxt, + const xmlChar *name, + const xmlChar *ns_uri); + +void xmlXPathRegisterVariableLookup (xmlXPathContextPtr ctxt, + xmlXPathVariableLookupFunc f, + void *data); + +/* + * Function Lookup forwarding. + */ +/** + * xmlXPathFuncLookupFunc: + * @ctxt: an XPath context + * @name: name of the function + * @ns_uri: the namespace name hosting this function + * + * Prototype for callbacks used to plug function lookup in the XPath + * engine. + * + * Returns the XPath function or NULL if not found. + */ +typedef xmlXPathFunction (*xmlXPathFuncLookupFunc) (void *ctxt, + const xmlChar *name, + const xmlChar *ns_uri); + +void xmlXPathRegisterFuncLookup (xmlXPathContextPtr ctxt, + xmlXPathFuncLookupFunc f, + void *funcCtxt); + +/* + * Error reporting. + */ +void xmlXPatherror (xmlXPathParserContextPtr ctxt, + const char *file, + int line, + int no); + +void xmlXPathDebugDumpObject (FILE *output, + xmlXPathObjectPtr cur, + int depth); +void xmlXPathDebugDumpCompExpr(FILE *output, + xmlXPathCompExprPtr comp, + int depth); + +/** + * NodeSet handling. + */ +int xmlXPathNodeSetContains (xmlNodeSetPtr cur, + xmlNodePtr val); +xmlNodeSetPtr xmlXPathDifference (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); +xmlNodeSetPtr xmlXPathIntersection (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); + +xmlNodeSetPtr xmlXPathDistinctSorted (xmlNodeSetPtr nodes); +xmlNodeSetPtr xmlXPathDistinct (xmlNodeSetPtr nodes); + +int xmlXPathHasSameNodes (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); + +xmlNodeSetPtr xmlXPathNodeLeadingSorted (xmlNodeSetPtr nodes, + xmlNodePtr node); +xmlNodeSetPtr xmlXPathLeadingSorted (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); +xmlNodeSetPtr xmlXPathNodeLeading (xmlNodeSetPtr nodes, + xmlNodePtr node); +xmlNodeSetPtr xmlXPathLeading (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); + +xmlNodeSetPtr xmlXPathNodeTrailingSorted (xmlNodeSetPtr nodes, + xmlNodePtr node); +xmlNodeSetPtr xmlXPathTrailingSorted (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); +xmlNodeSetPtr xmlXPathNodeTrailing (xmlNodeSetPtr nodes, + xmlNodePtr node); +xmlNodeSetPtr xmlXPathTrailing (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); + + +/** + * Extending a context. + */ + +int xmlXPathRegisterNs (xmlXPathContextPtr ctxt, + const xmlChar *prefix, + const xmlChar *ns_uri); +const xmlChar * xmlXPathNsLookup (xmlXPathContextPtr ctxt, + const xmlChar *prefix); +void xmlXPathRegisteredNsCleanup (xmlXPathContextPtr ctxt); + +int xmlXPathRegisterFunc (xmlXPathContextPtr ctxt, + const xmlChar *name, + xmlXPathFunction f); +int xmlXPathRegisterFuncNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathFunction f); +int xmlXPathRegisterVariable (xmlXPathContextPtr ctxt, + const xmlChar *name, + xmlXPathObjectPtr value); +int xmlXPathRegisterVariableNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathObjectPtr value); +xmlXPathFunction xmlXPathFunctionLookup (xmlXPathContextPtr ctxt, + const xmlChar *name); +xmlXPathFunction xmlXPathFunctionLookupNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +void xmlXPathRegisteredFuncsCleanup(xmlXPathContextPtr ctxt); +xmlXPathObjectPtr xmlXPathVariableLookup (xmlXPathContextPtr ctxt, + const xmlChar *name); +xmlXPathObjectPtr xmlXPathVariableLookupNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +void xmlXPathRegisteredVariablesCleanup(xmlXPathContextPtr ctxt); + +/** + * Utilities to extend XPath. + */ +xmlXPathParserContextPtr + xmlXPathNewParserContext (const xmlChar *str, + xmlXPathContextPtr ctxt); +void xmlXPathFreeParserContext (xmlXPathParserContextPtr ctxt); + +/* TODO: remap to xmlXPathValuePop and Push. */ +xmlXPathObjectPtr valuePop (xmlXPathParserContextPtr ctxt); +int valuePush (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr value); + +xmlXPathObjectPtr xmlXPathNewString (const xmlChar *val); +xmlXPathObjectPtr xmlXPathNewCString (const char *val); +xmlXPathObjectPtr xmlXPathWrapString (xmlChar *val); +xmlXPathObjectPtr xmlXPathWrapCString (char * val); +xmlXPathObjectPtr xmlXPathNewFloat (double val); +xmlXPathObjectPtr xmlXPathNewBoolean (int val); +xmlXPathObjectPtr xmlXPathNewNodeSet (xmlNodePtr val); +xmlXPathObjectPtr xmlXPathNewValueTree (xmlNodePtr val); +void xmlXPathNodeSetAdd (xmlNodeSetPtr cur, + xmlNodePtr val); +void xmlXPathNodeSetAddUnique (xmlNodeSetPtr cur, + xmlNodePtr val); +void xmlXPathNodeSetAddNs (xmlNodeSetPtr cur, + xmlNodePtr node, + xmlNsPtr ns); +void xmlXPathNodeSetSort (xmlNodeSetPtr set); + +void xmlXPathRoot (xmlXPathParserContextPtr ctxt); +void xmlXPathEvalExpr (xmlXPathParserContextPtr ctxt); +xmlChar * xmlXPathParseName (xmlXPathParserContextPtr ctxt); +xmlChar * xmlXPathParseNCName (xmlXPathParserContextPtr ctxt); + +/* + * Existing functions. + */ +double xmlXPathStringEvalNumber(const xmlChar *str); +int xmlXPathEvaluatePredicateResult(xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr res); +void xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt); +xmlNodeSetPtr xmlXPathNodeSetMerge(xmlNodeSetPtr val1, xmlNodeSetPtr val2); +void xmlXPathNodeSetDel(xmlNodeSetPtr cur, xmlNodePtr val); +void xmlXPathNodeSetRemove(xmlNodeSetPtr cur, int val); +xmlXPathObjectPtr xmlXPathNewNodeSetList(xmlNodeSetPtr val); +xmlXPathObjectPtr xmlXPathWrapNodeSet(xmlNodeSetPtr val); +xmlXPathObjectPtr xmlXPathWrapExternal(void *val); + +int xmlXPathEqualValues(xmlXPathParserContextPtr ctxt); +int xmlXPathNotEqualValues(xmlXPathParserContextPtr ctxt); +int xmlXPathCompareValues(xmlXPathParserContextPtr ctxt, int inf, int strict); +void xmlXPathValueFlipSign(xmlXPathParserContextPtr ctxt); +void xmlXPathAddValues(xmlXPathParserContextPtr ctxt); +void xmlXPathSubValues(xmlXPathParserContextPtr ctxt); +void xmlXPathMultValues(xmlXPathParserContextPtr ctxt); +void xmlXPathDivValues(xmlXPathParserContextPtr ctxt); +void xmlXPathModValues(xmlXPathParserContextPtr ctxt); + +int xmlXPathIsNodeType(const xmlChar *name); + +/* + * Some of the axis navigation routines. + */ +xmlNodePtr xmlXPathNextSelf(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextChild(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextDescendant(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextDescendantOrSelf(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextParent(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextAncestorOrSelf(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextFollowingSibling(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextFollowing(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextNamespace(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextAttribute(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +xmlNodePtr xmlXPathNextPrecedingSibling(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +/* + * The official core of XPath functions. + */ +void xmlXPathLastFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathPositionFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathCountFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathLocalNameFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNamespaceURIFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStringLengthFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathConcatFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathContainsFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStartsWithFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringBeforeFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringAfterFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNormalizeFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathTranslateFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNotFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathTrueFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathFalseFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathLangFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNumberFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSumFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathFloorFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathCeilingFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathRoundFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathBooleanFunction(xmlXPathParserContextPtr ctxt, int nargs); + +/** + * Really internal functions + */ +void xmlXPathNodeSetFreeNs(xmlNsPtr ns); + +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_XPATH_INTERNALS_H__ */ diff --git a/bundle/libxml/include/libxml/xpointer.h b/bundle/libxml/include/libxml/xpointer.h new file mode 100644 index 0000000000..80c465c70f --- /dev/null +++ b/bundle/libxml/include/libxml/xpointer.h @@ -0,0 +1,83 @@ +/* + * xpointer.h : API to handle XML Pointers + * + * World Wide Web Consortium Working Draft 03-March-1998 + * http://www.w3.org/TR/1998/WD-xptr-19980303 + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_XPTR_H__ +#define __XML_XPTR_H__ + +#include <libxml/tree.h> +#include <libxml/xpath.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A Location Set + */ +typedef struct _xmlLocationSet xmlLocationSet; +typedef xmlLocationSet *xmlLocationSetPtr; +struct _xmlLocationSet { + int locNr; /* number of locations in the set */ + int locMax; /* size of the array as allocated */ + xmlXPathObjectPtr *locTab;/* array of locations */ +}; + +/* + * Handling of location sets. + */ + +xmlLocationSetPtr xmlXPtrLocationSetCreate(xmlXPathObjectPtr val); +void xmlXPtrFreeLocationSet (xmlLocationSetPtr obj); +xmlLocationSetPtr xmlXPtrLocationSetMerge (xmlLocationSetPtr val1, + xmlLocationSetPtr val2); +xmlXPathObjectPtr xmlXPtrNewRange (xmlNodePtr start, + int startindex, + xmlNodePtr end, + int endindex); +xmlXPathObjectPtr xmlXPtrNewRangePoints (xmlXPathObjectPtr start, + xmlXPathObjectPtr end); +xmlXPathObjectPtr xmlXPtrNewRangeNodePoint(xmlNodePtr start, + xmlXPathObjectPtr end); +xmlXPathObjectPtr xmlXPtrNewRangePointNode(xmlXPathObjectPtr start, + xmlNodePtr end); +xmlXPathObjectPtr xmlXPtrNewRangeNodes (xmlNodePtr start, + xmlNodePtr end); +xmlXPathObjectPtr xmlXPtrNewLocationSetNodes(xmlNodePtr start, + xmlNodePtr end); +xmlXPathObjectPtr xmlXPtrNewLocationSetNodeSet(xmlNodeSetPtr set); +xmlXPathObjectPtr xmlXPtrNewRangeNodeObject(xmlNodePtr start, + xmlXPathObjectPtr end); +xmlXPathObjectPtr xmlXPtrNewCollapsedRange(xmlNodePtr start); +void xmlXPtrLocationSetAdd (xmlLocationSetPtr cur, + xmlXPathObjectPtr val); +xmlXPathObjectPtr xmlXPtrWrapLocationSet (xmlLocationSetPtr val); +void xmlXPtrLocationSetDel (xmlLocationSetPtr cur, + xmlXPathObjectPtr val); +void xmlXPtrLocationSetRemove(xmlLocationSetPtr cur, + int val); + +/* + * Functions. + */ +xmlXPathContextPtr xmlXPtrNewContext (xmlDocPtr doc, + xmlNodePtr here, + xmlNodePtr origin); +xmlXPathObjectPtr xmlXPtrEval (const xmlChar *str, + xmlXPathContextPtr ctx); +void xmlXPtrRangeToFunction (xmlXPathParserContextPtr ctxt, + int nargs); +xmlNodePtr xmlXPtrBuildNodeList (xmlXPathObjectPtr obj); +void xmlXPtrEvalRangePredicate (xmlXPathParserContextPtr ctxt); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XPTR_H__ */ diff --git a/bundle/libxml/include/win32config.h b/bundle/libxml/include/win32config.h new file mode 100644 index 0000000000..5b004798cc --- /dev/null +++ b/bundle/libxml/include/win32config.h @@ -0,0 +1,111 @@ +#ifndef __LIBXML_WIN32_CONFIG__ +#define __LIBXML_WIN32_CONFIG__ + +#define HAVE_CTYPE_H +#define HAVE_STDARG_H +#define HAVE_MALLOC_H +#define HAVE_ERRNO_H + +#ifdef _WIN32_WCE +#undef HAVE_ERRNO_H +#include <windows.h> +#include "wincecompat.h" +#else +#define HAVE_SYS_STAT_H +#define HAVE__STAT +#define HAVE_STAT +#define HAVE_STDLIB_H +#define HAVE_TIME_H +#define HAVE_FCNTL_H +#include <io.h> +#include <direct.h> +#endif + +#include <libxml/xmlversion.h> + +#ifdef NEED_SOCKETS +#include <wsockcompat.h> +#endif + +#define HAVE_ISINF +#define HAVE_ISNAN +#include <math.h> +#ifdef _MSC_VER +/* MS C-runtime has functions which can be used in order to determine if + a given floating-point variable contains NaN, (+-)INF. These are + preferred, because floating-point technology is considered propriatary + by MS and we can assume that their functions know more about their + oddities than we do. */ +#include <float.h> +/* Bjorn Reese figured a quite nice construct for isinf() using the _fpclass + function. */ +#ifndef isinf +#define isinf(d) ((_fpclass(d) == _FPCLASS_PINF) ? 1 \ + : ((_fpclass(d) == _FPCLASS_NINF) ? -1 : 0)) +#endif +/* _isnan(x) returns nonzero if (x == NaN) and zero otherwise. */ +#ifndef isnan +#define isnan(d) (_isnan(d)) +#endif +#else /* _MSC_VER */ +#ifndef isinf +static int isinf (double d) { + int expon = 0; + double val = frexp (d, &expon); + if (expon == 1025) { + if (val == 0.5) { + return 1; + } else if (val == -0.5) { + return -1; + } else { + return 0; + } + } else { + return 0; + } +} +#endif +#ifndef isnan +static int isnan (double d) { + int expon = 0; + double val = frexp (d, &expon); + if (expon == 1025) { + if (val == 0.5) { + return 0; + } else if (val == -0.5) { + return 0; + } else { + return 1; + } + } else { + return 0; + } +} +#endif +#endif /* _MSC_VER */ + +#if defined(_MSC_VER) || defined(__MINGW32__) +#define mkdir(p,m) _mkdir(p) +#define snprintf _snprintf +#define vsnprintf(b,c,f,a) _vsnprintf(b,c,f,a) +#endif + +/* Threading API to use should be specified here for compatibility reasons. + This is however best specified on the compiler's command-line. */ +#if defined(LIBXML_THREAD_ENABLED) +#if !defined(HAVE_PTHREAD_H) && !defined(HAVE_WIN32_THREADS) +#define HAVE_WIN32_THREADS +#endif +#endif + +/* Some third-party libraries far from our control assume the following + is defined, which it is not if we don't include windows.h. */ +#if !defined(FALSE) +#define FALSE 0 +#endif +#if !defined(TRUE) +#define TRUE (!(FALSE)) +#endif + +#endif /* __LIBXML_WIN32_CONFIG__ */ + diff --git a/bundle/libxml/include/wsockcompat.h b/bundle/libxml/include/wsockcompat.h new file mode 100644 index 0000000000..690048cede --- /dev/null +++ b/bundle/libxml/include/wsockcompat.h @@ -0,0 +1,61 @@ +/* include/wsockcompat.h + * Windows -> Berkeley Sockets compatibility things. + */ + +#if !defined __XML_WSOCKCOMPAT_H__ +#define __XML_WSOCKCOMPAT_H__ + +#ifdef _WIN32_WCE +#include <winsock.h> +#else +#undef HAVE_ERRNO_H +#include <winsock2.h> +#endif + +#if !defined SOCKLEN_T +#define SOCKLEN_T int +#endif + +#define EWOULDBLOCK WSAEWOULDBLOCK +#define EINPROGRESS WSAEINPROGRESS +#define EALREADY WSAEALREADY +#define ENOTSOCK WSAENOTSOCK +#define EDESTADDRREQ WSAEDESTADDRREQ +#define EMSGSIZE WSAEMSGSIZE +#define EPROTOTYPE WSAEPROTOTYPE +#define ENOPROTOOPT WSAENOPROTOOPT +#define EPROTONOSUPPORT WSAEPROTONOSUPPORT +#define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT +#define EOPNOTSUPP WSAEOPNOTSUPP +#define EPFNOSUPPORT WSAEPFNOSUPPORT +#define EAFNOSUPPORT WSAEAFNOSUPPORT +#define EADDRINUSE WSAEADDRINUSE +#define EADDRNOTAVAIL WSAEADDRNOTAVAIL +#define ENETDOWN WSAENETDOWN +#define ENETUNREACH WSAENETUNREACH +#define ENETRESET WSAENETRESET +#define ECONNABORTED WSAECONNABORTED +#define ECONNRESET WSAECONNRESET +#define ENOBUFS WSAENOBUFS +#define EISCONN WSAEISCONN +#define ENOTCONN WSAENOTCONN +#define ESHUTDOWN WSAESHUTDOWN +#define ETOOMANYREFS WSAETOOMANYREFS +#define ETIMEDOUT WSAETIMEDOUT +#define ECONNREFUSED WSAECONNREFUSED +#define ELOOP WSAELOOP +#define EHOSTDOWN WSAEHOSTDOWN +#define EHOSTUNREACH WSAEHOSTUNREACH +#define EPROCLIM WSAEPROCLIM +#define EUSERS WSAEUSERS +#define EDQUOT WSAEDQUOT +#define ESTALE WSAESTALE +#define EREMOTE WSAEREMOTE +/* These cause conflicts with the codes from errno.h. Since they are + not used in the relevant code (nanoftp, nanohttp), we can leave + them disabled. +#define ENAMETOOLONG WSAENAMETOOLONG +#define ENOTEMPTY WSAENOTEMPTY +*/ + +#endif /* __XML_WSOCKCOMPAT_H__ */ diff --git a/bundle/libxml/libxml.h b/bundle/libxml/libxml.h new file mode 100644 index 0000000000..d34dd10444 --- /dev/null +++ b/bundle/libxml/libxml.h @@ -0,0 +1,28 @@ +/* + * libxml.h: internal header only used during the compilation of libxml + * + * See COPYRIGHT for the status of this software + * + * Author: breese@users.sourceforge.net + */ + +#ifndef __XML_LIBXML_H__ +#define __XML_LIBXML_H__ + +#include "php_config.h" +#include <libxml/xmlversion.h> + +#ifndef WITH_TRIO +#include <stdio.h> +#else +/** + * TRIO_REPLACE_STDIO: + * + * This macro is defined if teh trio string formatting functions are to + * be used instead of the default stdio ones. + */ +#define TRIO_REPLACE_STDIO +#include "trio.h" +#endif + +#endif /* ! __XML_LIBXML_H__ */ diff --git a/bundle/libxml/list.c b/bundle/libxml/list.c new file mode 100644 index 0000000000..18a82973e5 --- /dev/null +++ b/bundle/libxml/list.c @@ -0,0 +1,720 @@ +/* + * list.c: lists handling implementation + * + * Copyright (C) 2000 Gary Pennington and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: Gary.Pennington@uk.sun.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <stdlib.h> +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/list.h> +#include <libxml/globals.h> + +/* + * Type definition are kept internal + */ + +struct _xmlLink +{ + struct _xmlLink *next; + struct _xmlLink *prev; + void *data; +}; + +struct _xmlList +{ + xmlLinkPtr sentinel; + void (*linkDeallocator)(xmlLinkPtr ); + int (*linkCompare)(const void *, const void*); +}; + +/************************************************************************ + * * + * Interfaces * + * * + ************************************************************************/ + +/** + * xmlLinkDeallocator: + * @l: a list + * @lk: a link + * + * Unlink and deallocate @lk from list @l + */ +static void +xmlLinkDeallocator(xmlListPtr l, xmlLinkPtr lk) +{ + (lk->prev)->next = lk->next; + (lk->next)->prev = lk->prev; + if(l->linkDeallocator) + l->linkDeallocator(lk); + xmlFree(lk); +} + +/** + * xmlLinkCompare: + * @data0: first data + * @data1: second data + * + * Compares two arbitrary data + * + * Returns -1, 0 or 1 depending on whether data1 is greater equal or smaller + * than data0 + */ +static int +xmlLinkCompare(const void *data0, const void *data1) +{ + if (data0 < data1) + return (-1); + else if (data0 == data1) + return (0); + return (1); +} + +/** + * xmlListLowerSearch: + * @l: a list + * @data: a data + * + * Search data in the ordered list walking from the beginning + * + * Returns the link containing the data or NULL + */ +static xmlLinkPtr +xmlListLowerSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + + for(lk = l->sentinel->next;lk != l->sentinel && l->linkCompare(lk->data, data) <0 ;lk = lk->next); + return lk; +} + +/** + * xmlListHigherSearch: + * @l: a list + * @data: a data + * + * Search data in the ordered list walking backward from the end + * + * Returns the link containing the data or NULL + */ +static xmlLinkPtr +xmlListHigherSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + + for(lk = l->sentinel->prev;lk != l->sentinel && l->linkCompare(lk->data, data) >0 ;lk = lk->prev); + return lk; +} + +/** + * xmlListSearch: + * @l: a list + * @data: a data + * + * Search data in the list + * + * Returns the link containing the data or NULL + */ +static xmlLinkPtr +xmlListLinkSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + lk = xmlListLowerSearch(l, data); + if (lk == l->sentinel) + return NULL; + else { + if (l->linkCompare(lk->data, data) ==0) + return lk; + return NULL; + } +} + +/** + * xmlListLinkReverseSearch: + * @l: a list + * @data: a data + * + * Search data in the list processing backward + * + * Returns the link containing the data or NULL + */ +static xmlLinkPtr +xmlListLinkReverseSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + lk = xmlListHigherSearch(l, data); + if (lk == l->sentinel) + return NULL; + else { + if (l->linkCompare(lk->data, data) ==0) + return lk; + return NULL; + } +} + +/** + * xmlListCreate: + * @deallocator: an optional deallocator function + * @compare: an optional comparison function + * + * Create a new list + * + * Returns the new list or NULL in case of error + */ +xmlListPtr +xmlListCreate(xmlListDeallocator deallocator, xmlListDataCompare compare) +{ + xmlListPtr l; + if (NULL == (l = (xmlListPtr )xmlMalloc( sizeof(xmlList)))) { + xmlGenericError(xmlGenericErrorContext, + "Cannot initialize memory for list"); + return (NULL); + } + /* Initialize the list to NULL */ + memset(l, 0, sizeof(xmlList)); + + /* Add the sentinel */ + if (NULL ==(l->sentinel = (xmlLinkPtr )xmlMalloc(sizeof(xmlLink)))) { + xmlGenericError(xmlGenericErrorContext, + "Cannot initialize memory for sentinel"); + xmlFree(l); + return (NULL); + } + l->sentinel->next = l->sentinel; + l->sentinel->prev = l->sentinel; + l->sentinel->data = NULL; + + /* If there is a link deallocator, use it */ + if (deallocator != NULL) + l->linkDeallocator = deallocator; + /* If there is a link comparator, use it */ + if (compare != NULL) + l->linkCompare = compare; + else /* Use our own */ + l->linkCompare = xmlLinkCompare; + return l; +} + +/** + * xmlListSearch: + * @l: a list + * @data: a search value + * + * Search the list for an existing value of @data + * + * Returns the value associated to @data or NULL in case of error + */ +void * +xmlListSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + lk = xmlListLinkSearch(l, data); + if (lk) + return (lk->data); + return NULL; +} + +/** + * xmlListReverseSearch: + * @l: a list + * @data: a search value + * + * Search the list in reverse order for an existing value of @data + * + * Returns the value associated to @data or NULL in case of error + */ +void * +xmlListReverseSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + lk = xmlListLinkReverseSearch(l, data); + if (lk) + return (lk->data); + return NULL; +} + +/** + * xmlListInsert: + * @l: a list + * @data: the data + * + * Insert data in the ordered list at the beginning for this value + * + * Returns 0 in case of success, 1 in case of failure + */ +int +xmlListInsert(xmlListPtr l, void *data) +{ + xmlLinkPtr lkPlace, lkNew; + + lkPlace = xmlListLowerSearch(l, data); + /* Add the new link */ + lkNew = (xmlLinkPtr) xmlMalloc(sizeof(xmlLink)); + if (lkNew == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot initialize memory for new link"); + return (1); + } + lkNew->data = data; + lkPlace = lkPlace->prev; + lkNew->next = lkPlace->next; + (lkPlace->next)->prev = lkNew; + lkPlace->next = lkNew; + lkNew->prev = lkPlace; + return 0; +} + +/** + * xmlListAppend: + * @l: a list + * @data: the data + * + * Insert data in the ordered list at the end for this value + * + * Returns 0 in case of success, 1 in case of failure + */ +int xmlListAppend(xmlListPtr l, void *data) +{ + xmlLinkPtr lkPlace, lkNew; + + lkPlace = xmlListHigherSearch(l, data); + /* Add the new link */ + lkNew = (xmlLinkPtr) xmlMalloc(sizeof(xmlLink)); + if (lkNew == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot initialize memory for new link"); + return (0); + } + lkNew->data = data; + lkNew->next = lkPlace->next; + (lkPlace->next)->prev = lkNew; + lkPlace->next = lkNew; + lkNew->prev = lkPlace; + return 1; +} + +/** + * xmlListDelete: + * @l: a list + * + * Deletes the list and its associated data + */ +void xmlListDelete(xmlListPtr l) +{ + xmlListClear(l); + xmlFree(l->sentinel); + xmlFree(l); +} + +/** + * xmlListRemoveFirst: + * @l: a list + * @data: list data + * + * Remove the first instance associated to data in the list + * + * Returns 1 if a deallocation occured, or 0 if not found + */ +int +xmlListRemoveFirst(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + + /*Find the first instance of this data */ + lk = xmlListLinkSearch(l, data); + if (lk != NULL) { + xmlLinkDeallocator(l, lk); + return 1; + } + return 0; +} + +/** + * xmlListRemoveLast: + * @l: a list + * @data: list data + * + * Remove the last instance associated to data in the list + * + * Returns 1 if a deallocation occured, or 0 if not found + */ +int +xmlListRemoveLast(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + + /*Find the last instance of this data */ + lk = xmlListLinkReverseSearch(l, data); + if (lk != NULL) { + xmlLinkDeallocator(l, lk); + return 1; + } + return 0; +} + +/** + * xmlListRemoveAll: + * @l: a list + * @data: list data + * + * Remove the all instance associated to data in the list + * + * Returns the number of deallocation, or 0 if not found + */ +int +xmlListRemoveAll(xmlListPtr l, void *data) +{ + int count=0; + + + while(xmlListRemoveFirst(l, data)) + count++; + return count; +} + +/** + * xmlListClear: + * @l: a list + * + * Remove the all data in the list + */ +void +xmlListClear(xmlListPtr l) +{ + xmlLinkPtr lk = l->sentinel->next; + + while(lk != l->sentinel) { + xmlLinkPtr next = lk->next; + + xmlLinkDeallocator(l, lk); + lk = next; + } +} + +/** + * xmlListEmpty: + * @l: a list + * + * Is the list empty ? + * + * Returns 1 if the list is empty, 0 otherwise + */ +int +xmlListEmpty(xmlListPtr l) +{ + return (l->sentinel->next == l->sentinel); +} + +/** + * xmlListFront: + * @l: a list + * + * Get the first element in the list + * + * Returns the first element in the list, or NULL + */ +xmlLinkPtr +xmlListFront(xmlListPtr l) +{ + return (l->sentinel->next); +} + +/** + * xmlListEnd: + * @l: a list + * + * Get the last element in the list + * + * Returns the last element in the list, or NULL + */ +xmlLinkPtr +xmlListEnd(xmlListPtr l) +{ + return (l->sentinel->prev); +} + +/** + * xmlListSize: + * @l: a list + * + * Get the number of elements in the list + * + * Returns the number of elements in the list + */ +int +xmlListSize(xmlListPtr l) +{ + xmlLinkPtr lk; + int count=0; + + /* TODO: keep a counter in xmlList instead */ + for(lk = l->sentinel->next; lk != l->sentinel; lk = lk->next, count++); + return count; +} + +/** + * xmlListPopFront: + * @l: a list + * + * Removes the first element in the list + */ +void +xmlListPopFront(xmlListPtr l) +{ + if(!xmlListEmpty(l)) + xmlLinkDeallocator(l, l->sentinel->next); +} + +/** + * xmlListPopBack: + * @l: a list + * + * Removes the last element in the list + */ +void +xmlListPopBack(xmlListPtr l) +{ + if(!xmlListEmpty(l)) + xmlLinkDeallocator(l, l->sentinel->prev); +} + +/** + * xmlListPushFront: + * @l: a list + * @data: new data + * + * add the new data at the beginning of the list + * + * Returns 1 if successful, 0 otherwise + */ +int +xmlListPushFront(xmlListPtr l, void *data) +{ + xmlLinkPtr lkPlace, lkNew; + + lkPlace = l->sentinel; + /* Add the new link */ + lkNew = (xmlLinkPtr) xmlMalloc(sizeof(xmlLink)); + if (lkNew == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot initialize memory for new link"); + return (0); + } + lkNew->data = data; + lkNew->next = lkPlace->next; + (lkPlace->next)->prev = lkNew; + lkPlace->next = lkNew; + lkNew->prev = lkPlace; + return 1; +} + +/** + * xmlListPushBack: + * @l: a list + * @data: new data + * + * add the new data at the end of the list + * + * Returns 1 if successful, 0 otherwise + */ +int +xmlListPushBack(xmlListPtr l, void *data) +{ + xmlLinkPtr lkPlace, lkNew; + + lkPlace = l->sentinel->prev; + /* Add the new link */ + if (NULL ==(lkNew = (xmlLinkPtr )xmlMalloc(sizeof(xmlLink)))) { + xmlGenericError(xmlGenericErrorContext, + "Cannot initialize memory for new link"); + return (0); + } + lkNew->data = data; + lkNew->next = lkPlace->next; + (lkPlace->next)->prev = lkNew; + lkPlace->next = lkNew; + lkNew->prev = lkPlace; + return 1; +} + +/** + * xmlLinkGetData: + * @lk: a link + * + * See Returns. + * + * Returns a pointer to the data referenced from this link + */ +void * +xmlLinkGetData(xmlLinkPtr lk) +{ + return lk->data; +} + +/** + * xmlListReverse: + * @l: a list + * + * Reverse the order of the elements in the list + */ +void +xmlListReverse(xmlListPtr l) { + xmlLinkPtr lk; + xmlLinkPtr lkPrev = l->sentinel; + + for(lk = l->sentinel->next; lk != l->sentinel; lk = lk->next) { + lkPrev->next = lkPrev->prev; + lkPrev->prev = lk; + lkPrev = lk; + } + /* Fix up the last node */ + lkPrev->next = lkPrev->prev; + lkPrev->prev = lk; +} + +/** + * xmlListSort: + * @l: a list + * + * Sort all the elements in the list + */ +void +xmlListSort(xmlListPtr l) +{ + xmlListPtr lTemp; + + if(xmlListEmpty(l)) + return; + + /* I think that the real answer is to implement quicksort, the + * alternative is to implement some list copying procedure which + * would be based on a list copy followed by a clear followed by + * an insert. This is slow... + */ + + if (NULL ==(lTemp = xmlListDup(l))) + return; + xmlListClear(l); + xmlListMerge(l, lTemp); + xmlListDelete(lTemp); + return; +} + +/** + * xmlListWalk: + * @l: a list + * @walker: a processing function + * @user: a user parameter passed to the walker function + * + * Walk all the element of the first from first to last and + * apply the walker function to it + */ +void +xmlListWalk(xmlListPtr l, xmlListWalker walker, const void *user) { + xmlLinkPtr lk; + + for(lk = l->sentinel->next; lk != l->sentinel; lk = lk->next) { + if((walker(lk->data, user)) == 0) + break; + } +} + +/** + * xmlListReverseWalk: + * @l: a list + * @walker: a processing function + * @user: a user parameter passed to the walker function + * + * Walk all the element of the list in reverse order and + * apply the walker function to it + */ +void +xmlListReverseWalk(xmlListPtr l, xmlListWalker walker, const void *user) { + xmlLinkPtr lk; + + for(lk = l->sentinel->prev; lk != l->sentinel; lk = lk->prev) { + if((walker(lk->data, user)) == 0) + break; + } +} + +/** + * xmlListMerge: + * @l1: the original list + * @l2: the new list + * + * include all the elements of the second list in the first one and + * clear the second list + */ +void +xmlListMerge(xmlListPtr l1, xmlListPtr l2) +{ + xmlListCopy(l1, l2); + xmlListClear(l2); +} + +/** + * xmlListDup: + * @old: the list + * + * Duplicate the list + * + * Returns a new copy of the list or NULL in case of error + */ +xmlListPtr +xmlListDup(const xmlListPtr old) +{ + xmlListPtr cur; + /* Hmmm, how to best deal with allocation issues when copying + * lists. If there is a de-allocator, should responsibility lie with + * the new list or the old list. Surely not both. I'll arbitrarily + * set it to be the old list for the time being whilst I work out + * the answer + */ + if (NULL ==(cur = xmlListCreate(NULL, old->linkCompare))) + return (NULL); + if (0 != xmlListCopy(cur, old)) + return NULL; + return cur; +} + +/** + * xmlListCopy: + * @cur: the new list + * @old: the old list + * + * Move all the element from the old list in the new list + * + * Returns 0 in case of success 1 in case of error + */ +int +xmlListCopy(xmlListPtr cur, const xmlListPtr old) +{ + /* Walk the old tree and insert the data into the new one */ + xmlLinkPtr lk; + + for(lk = old->sentinel->next; lk != old->sentinel; lk = lk->next) { + if (0 !=xmlListInsert(cur, lk->data)) { + xmlListDelete(cur); + return (1); + } + } + return (0); +} +/* xmlListUnique() */ +/* xmlListSwap */ diff --git a/bundle/libxml/nanoftp.c b/bundle/libxml/nanoftp.c new file mode 100644 index 0000000000..4f3b819238 --- /dev/null +++ b/bundle/libxml/nanoftp.c @@ -0,0 +1,1924 @@ +/* + * nanoftp.c: basic FTP client support + * + * Reference: RFC 959 + */ + +#ifdef TESTING +#define STANDALONE +#define HAVE_STDLIB_H +#define HAVE_UNISTD_H +#define HAVE_SYS_SOCKET_H +#define HAVE_NETINET_IN_H +#define HAVE_NETDB_H +#define HAVE_SYS_TIME_H +#else /* TESTING */ +#define NEED_SOCKETS +#endif /* TESTING */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_FTP_ENABLED +#include <string.h> + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_SYS_SOCKET_H +#include <sys/socket.h> +#endif +#ifdef HAVE_NETINET_IN_H +#include <netinet/in.h> +#endif +#ifdef HAVE_ARPA_INET_H +#include <arpa/inet.h> +#endif +#ifdef HAVE_NETDB_H +#include <netdb.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> +#include <libxml/xmlerror.h> +#include <libxml/nanoftp.h> +#include <libxml/globals.h> + +/* #define DEBUG_FTP 1 */ +#ifdef STANDALONE +#ifndef DEBUG_FTP +#define DEBUG_FTP 1 +#endif +#endif + +/** + * A couple portability macros + */ +#ifndef _WINSOCKAPI_ +#define closesocket(s) close(s) +#define SOCKET int +#endif +#if defined(VMS) || defined(__VMS) +#define SOCKLEN_T unsigned int +#endif + +/* + * Hack to make compilation work -SH + */ +#ifndef SOCKLEN_T +#define SOCKLEN_T unsigned int +#endif + +#define FTP_COMMAND_OK 200 +#define FTP_SYNTAX_ERROR 500 +#define FTP_GET_PASSWD 331 +#define FTP_BUF_SIZE 512 + +typedef struct xmlNanoFTPCtxt { + char *protocol; /* the protocol name */ + char *hostname; /* the host name */ + int port; /* the port */ + char *path; /* the path within the URL */ + char *user; /* user string */ + char *passwd; /* passwd string */ + struct sockaddr_in ftpAddr; /* the socket address struct */ + int passive; /* currently we support only passive !!! */ + SOCKET controlFd; /* the file descriptor for the control socket */ + SOCKET dataFd; /* the file descriptor for the data socket */ + int state; /* WRITE / READ / CLOSED */ + int returnValue; /* the protocol return value */ + /* buffer for data received from the control connection */ + char controlBuf[FTP_BUF_SIZE + 1]; + int controlBufIndex; + int controlBufUsed; + int controlBufAnswer; +} xmlNanoFTPCtxt, *xmlNanoFTPCtxtPtr; + +static int initialized = 0; +static char *proxy = NULL; /* the proxy name if any */ +static int proxyPort = 0; /* the proxy port if any */ +static char *proxyUser = NULL; /* user for proxy authentication */ +static char *proxyPasswd = NULL;/* passwd for proxy authentication */ +static int proxyType = 0; /* uses TYPE or a@b ? */ + +/** + * xmlNanoFTPInit: + * + * Initialize the FTP protocol layer. + * Currently it just checks for proxy informations, + * and get the hostname + */ + +void +xmlNanoFTPInit(void) { + const char *env; +#ifdef _WINSOCKAPI_ + WSADATA wsaData; +#endif + + if (initialized) + return; + +#ifdef _WINSOCKAPI_ + if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0) + return; +#endif + + proxyPort = 21; + env = getenv("no_proxy"); + if (env != NULL) + return; + env = getenv("ftp_proxy"); + if (env != NULL) { + xmlNanoFTPScanProxy(env); + } else { + env = getenv("FTP_PROXY"); + if (env != NULL) { + xmlNanoFTPScanProxy(env); + } + } + env = getenv("ftp_proxy_user"); + if (env != NULL) { + proxyUser = xmlMemStrdup(env); + } + env = getenv("ftp_proxy_password"); + if (env != NULL) { + proxyPasswd = xmlMemStrdup(env); + } + initialized = 1; +} + +/** + * xmlNanoFTPCleanup: + * + * Cleanup the FTP protocol layer. This cleanup proxy informations. + */ + +void +xmlNanoFTPCleanup(void) { + if (proxy != NULL) { + xmlFree(proxy); + proxy = NULL; + } + if (proxyUser != NULL) { + xmlFree(proxyUser); + proxyUser = NULL; + } + if (proxyPasswd != NULL) { + xmlFree(proxyPasswd); + proxyPasswd = NULL; + } +#ifdef _WINSOCKAPI_ + if (initialized) + WSACleanup(); +#endif + initialized = 0; +} + +/** + * xmlNanoFTPProxy: + * @host: the proxy host name + * @port: the proxy port + * @user: the proxy user name + * @passwd: the proxy password + * @type: the type of proxy 1 for using SITE, 2 for USER a@b + * + * Setup the FTP proxy informations. + * This can also be done by using ftp_proxy ftp_proxy_user and + * ftp_proxy_password environment variables. + */ + +void +xmlNanoFTPProxy(const char *host, int port, const char *user, + const char *passwd, int type) { + if (proxy != NULL) + xmlFree(proxy); + if (proxyUser != NULL) + xmlFree(proxyUser); + if (proxyPasswd != NULL) + xmlFree(proxyPasswd); + if (host) + proxy = xmlMemStrdup(host); + if (user) + proxyUser = xmlMemStrdup(user); + if (passwd) + proxyPasswd = xmlMemStrdup(passwd); + proxyPort = port; + proxyType = type; +} + +/** + * xmlNanoFTPScanURL: + * @ctx: an FTP context + * @URL: The URL used to initialize the context + * + * (Re)Initialize an FTP context by parsing the URL and finding + * the protocol host port and path it indicates. + */ + +static void +xmlNanoFTPScanURL(void *ctx, const char *URL) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + const char *cur = URL; + char buf[4096]; + int indx = 0; + int port = 0; + + if (ctxt->protocol != NULL) { + xmlFree(ctxt->protocol); + ctxt->protocol = NULL; + } + if (ctxt->hostname != NULL) { + xmlFree(ctxt->hostname); + ctxt->hostname = NULL; + } + if (ctxt->path != NULL) { + xmlFree(ctxt->path); + ctxt->path = NULL; + } + if (URL == NULL) return; + buf[indx] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[indx] = 0; + ctxt->protocol = xmlMemStrdup(buf); + indx = 0; + cur += 3; + break; + } + buf[indx++] = *cur++; + } + if (*cur == 0) return; + + buf[indx] = 0; + /* allow user@ and user:pass@ forms */ + { + const char *p = strchr(cur, '@'); + if(p) { + while(1) { + if(cur[0] == ':' || cur[0] == '@') break; + buf[indx++] = *cur++; + } + buf[indx] = 0; + ctxt->user = xmlMemStrdup(buf); + indx = 0; + if(cur[0] == ':') { + cur++; + while(1) { + if(cur[0] == '@') break; + buf[indx++] = *cur++; + } + buf[indx] = 0; + ctxt->passwd = xmlMemStrdup(buf); + indx = 0; + } + cur = p+1; + } + } + + while (1) { + if (cur[0] == ':') { + buf[indx] = 0; + ctxt->hostname = xmlMemStrdup(buf); + indx = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != 0) ctxt->port = port; + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[indx] = 0; + ctxt->hostname = xmlMemStrdup(buf); + indx = 0; + break; + } + buf[indx++] = *cur++; + } + if (*cur == 0) + ctxt->path = xmlMemStrdup("/"); + else { + indx = 0; + buf[indx] = 0; + while (*cur != 0) + buf[indx++] = *cur++; + buf[indx] = 0; + ctxt->path = xmlMemStrdup(buf); + } +} + +/** + * xmlNanoFTPUpdateURL: + * @ctx: an FTP context + * @URL: The URL used to update the context + * + * Update an FTP context by parsing the URL and finding + * new path it indicates. If there is an error in the + * protocol, hostname, port or other information, the + * error is raised. It indicates a new connection has to + * be established. + * + * Returns 0 if Ok, -1 in case of error (other host). + */ + +int +xmlNanoFTPUpdateURL(void *ctx, const char *URL) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + const char *cur = URL; + char buf[4096]; + int indx = 0; + int port = 0; + + if (URL == NULL) + return(-1); + if (ctxt == NULL) + return(-1); + if (ctxt->protocol == NULL) + return(-1); + if (ctxt->hostname == NULL) + return(-1); + buf[indx] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[indx] = 0; + if (strcmp(ctxt->protocol, buf)) + return(-1); + indx = 0; + cur += 3; + break; + } + buf[indx++] = *cur++; + } + if (*cur == 0) + return(-1); + + buf[indx] = 0; + while (1) { + if (cur[0] == ':') { + buf[indx] = 0; + if (strcmp(ctxt->hostname, buf)) + return(-1); + indx = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != ctxt->port) + return(-1); + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[indx] = 0; + if (strcmp(ctxt->hostname, buf)) + return(-1); + indx = 0; + break; + } + buf[indx++] = *cur++; + } + if (ctxt->path != NULL) { + xmlFree(ctxt->path); + ctxt->path = NULL; + } + + if (*cur == 0) + ctxt->path = xmlMemStrdup("/"); + else { + indx = 0; + buf[indx] = 0; + while (*cur != 0) + buf[indx++] = *cur++; + buf[indx] = 0; + ctxt->path = xmlMemStrdup(buf); + } + return(0); +} + +/** + * xmlNanoFTPScanProxy: + * @URL: The proxy URL used to initialize the proxy context + * + * (Re)Initialize the FTP Proxy context by parsing the URL and finding + * the protocol host port it indicates. + * Should be like ftp://myproxy/ or ftp://myproxy:3128/ + * A NULL URL cleans up proxy informations. + */ + +void +xmlNanoFTPScanProxy(const char *URL) { + const char *cur = URL; + char buf[4096]; + int indx = 0; + int port = 0; + + if (proxy != NULL) { + xmlFree(proxy); + proxy = NULL; + } + if (proxyPort != 0) { + proxyPort = 0; + } +#ifdef DEBUG_FTP + if (URL == NULL) + xmlGenericError(xmlGenericErrorContext, "Removing FTP proxy info\n"); + else + xmlGenericError(xmlGenericErrorContext, "Using FTP proxy %s\n", URL); +#endif + if (URL == NULL) return; + buf[indx] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[indx] = 0; + indx = 0; + cur += 3; + break; + } + buf[indx++] = *cur++; + } + if (*cur == 0) return; + + buf[indx] = 0; + while (1) { + if (cur[0] == ':') { + buf[indx] = 0; + proxy = xmlMemStrdup(buf); + indx = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != 0) proxyPort = port; + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[indx] = 0; + proxy = xmlMemStrdup(buf); + indx = 0; + break; + } + buf[indx++] = *cur++; + } +} + +/** + * xmlNanoFTPNewCtxt: + * @URL: The URL used to initialize the context + * + * Allocate and initialize a new FTP context. + * + * Returns an FTP context or NULL in case of error. + */ + +void* +xmlNanoFTPNewCtxt(const char *URL) { + xmlNanoFTPCtxtPtr ret; + + ret = (xmlNanoFTPCtxtPtr) xmlMalloc(sizeof(xmlNanoFTPCtxt)); + if (ret == NULL) return(NULL); + + memset(ret, 0, sizeof(xmlNanoFTPCtxt)); + ret->port = 21; + ret->passive = 1; + ret->returnValue = 0; + ret->controlBufIndex = 0; + ret->controlBufUsed = 0; + ret->controlFd = -1; + + if (URL != NULL) + xmlNanoFTPScanURL(ret, URL); + + return(ret); +} + +/** + * xmlNanoFTPFreeCtxt: + * @ctx: an FTP context + * + * Frees the context after closing the connection. + */ + +void +xmlNanoFTPFreeCtxt(void * ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + if (ctxt == NULL) return; + if (ctxt->hostname != NULL) xmlFree(ctxt->hostname); + if (ctxt->protocol != NULL) xmlFree(ctxt->protocol); + if (ctxt->path != NULL) xmlFree(ctxt->path); + ctxt->passive = 1; + if (ctxt->controlFd >= 0) closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + ctxt->controlBufIndex = -1; + ctxt->controlBufUsed = -1; + xmlFree(ctxt); +} + +/** + * xmlNanoFTPParseResponse: + * @buf: the buffer containing the response + * @len: the buffer length + * + * Parsing of the server answer, we just extract the code. + * + * returns 0 for errors + * +XXX for last line of response + * -XXX for response to be continued + */ +static int +xmlNanoFTPParseResponse(char *buf, int len) { + int val = 0; + + if (len < 3) return(-1); + if ((*buf >= '0') && (*buf <= '9')) + val = val * 10 + (*buf - '0'); + else + return(0); + buf++; + if ((*buf >= '0') && (*buf <= '9')) + val = val * 10 + (*buf - '0'); + else + return(0); + buf++; + if ((*buf >= '0') && (*buf <= '9')) + val = val * 10 + (*buf - '0'); + else + return(0); + buf++; + if (*buf == '-') + return(-val); + return(val); +} + +/** + * xmlNanoFTPGetMore: + * @ctx: an FTP context + * + * Read more information from the FTP control connection + * Returns the number of bytes read, < 0 indicates an error + */ +static int +xmlNanoFTPGetMore(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + int len; + int size; + + if ((ctxt->controlBufIndex < 0) || (ctxt->controlBufIndex > FTP_BUF_SIZE)) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : controlBufIndex = %d\n", + ctxt->controlBufIndex); +#endif + return(-1); + } + + if ((ctxt->controlBufUsed < 0) || (ctxt->controlBufUsed > FTP_BUF_SIZE)) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : controlBufUsed = %d\n", + ctxt->controlBufUsed); +#endif + return(-1); + } + if (ctxt->controlBufIndex > ctxt->controlBufUsed) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : controlBufIndex > controlBufUsed %d > %d\n", + ctxt->controlBufIndex, ctxt->controlBufUsed); +#endif + return(-1); + } + + /* + * First pack the control buffer + */ + if (ctxt->controlBufIndex > 0) { + memmove(&ctxt->controlBuf[0], &ctxt->controlBuf[ctxt->controlBufIndex], + ctxt->controlBufUsed - ctxt->controlBufIndex); + ctxt->controlBufUsed -= ctxt->controlBufIndex; + ctxt->controlBufIndex = 0; + } + size = FTP_BUF_SIZE - ctxt->controlBufUsed; + if (size == 0) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : buffer full %d \n", ctxt->controlBufUsed); +#endif + return(0); + } + + /* + * Read the amount left on the control connection + */ + if ((len = recv(ctxt->controlFd, &ctxt->controlBuf[ctxt->controlBufIndex], + size, 0)) < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : read %d [%d - %d]\n", len, + ctxt->controlBufUsed, ctxt->controlBufUsed + len); +#endif + ctxt->controlBufUsed += len; + ctxt->controlBuf[ctxt->controlBufUsed] = 0; + + return(len); +} + +/** + * xmlNanoFTPReadResponse: + * @ctx: an FTP context + * + * Read the response from the FTP server after a command. + * Returns the code number + */ +static int +xmlNanoFTPReadResponse(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char *ptr, *end; + int len; + int res = -1, cur = -1; + +get_more: + /* + * Assumes everything up to controlBuf[controlBufIndex] has been read + * and analyzed. + */ + len = xmlNanoFTPGetMore(ctx); + if (len < 0) { + return(-1); + } + if ((ctxt->controlBufUsed == 0) && (len == 0)) { + return(-1); + } + ptr = &ctxt->controlBuf[ctxt->controlBufIndex]; + end = &ctxt->controlBuf[ctxt->controlBufUsed]; + +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "\n<<<\n%s\n--\n", ptr); +#endif + while (ptr < end) { + cur = xmlNanoFTPParseResponse(ptr, end - ptr); + if (cur > 0) { + /* + * Successfully scanned the control code, scratch + * till the end of the line, but keep the index to be + * able to analyze the result if needed. + */ + res = cur; + ptr += 3; + ctxt->controlBufAnswer = ptr - ctxt->controlBuf; + while ((ptr < end) && (*ptr != '\n')) ptr++; + if (*ptr == '\n') ptr++; + if (*ptr == '\r') ptr++; + break; + } + while ((ptr < end) && (*ptr != '\n')) ptr++; + if (ptr >= end) { + ctxt->controlBufIndex = ctxt->controlBufUsed; + goto get_more; + } + if (*ptr != '\r') ptr++; + } + + if (res < 0) goto get_more; + ctxt->controlBufIndex = ptr - ctxt->controlBuf; +#ifdef DEBUG_FTP + ptr = &ctxt->controlBuf[ctxt->controlBufIndex]; + xmlGenericError(xmlGenericErrorContext, "\n---\n%s\n--\n", ptr); +#endif + +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "Got %d\n", res); +#endif + return(res / 100); +} + +/** + * xmlNanoFTPGetResponse: + * @ctx: an FTP context + * + * Get the response from the FTP server after a command. + * Returns the code number + */ + +int +xmlNanoFTPGetResponse(void *ctx) { + int res; + + res = xmlNanoFTPReadResponse(ctx); + + return(res); +} + +/** + * xmlNanoFTPCheckResponse: + * @ctx: an FTP context + * + * Check if there is a response from the FTP server after a command. + * Returns the code number, or 0 + */ + +int +xmlNanoFTPCheckResponse(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + fd_set rfd; + struct timeval tv; + + tv.tv_sec = 0; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->controlFd, &rfd); + switch(select(ctxt->controlFd + 1, &rfd, NULL, NULL, &tv)) { + case 0: + return(0); + case -1: +#ifdef DEBUG_FTP + perror("select"); +#endif + return(-1); + + } + + return(xmlNanoFTPReadResponse(ctx)); +} + +/** + * Send the user authentication + */ + +static int +xmlNanoFTPSendUser(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[200]; + int len; + int res; + + if (ctxt->user == NULL) + snprintf(buf, sizeof(buf), "USER anonymous\r\n"); + else + snprintf(buf, sizeof(buf), "USER %s\r\n", ctxt->user); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) return(res); + return(0); +} + +/** + * Send the password authentication + */ + +static int +xmlNanoFTPSendPasswd(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[200]; + int len; + int res; + + if (ctxt->passwd == NULL) + snprintf(buf, sizeof(buf), "PASS anonymous@\r\n"); + else + snprintf(buf, sizeof(buf), "PASS %s\r\n", ctxt->passwd); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) return(res); + return(0); +} + +/** + * xmlNanoFTPQuit: + * @ctx: an FTP context + * + * Send a QUIT command to the server + * + * Returns -1 in case of error, 0 otherwise + */ + + +int +xmlNanoFTPQuit(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[200]; + int len; + int res; + + snprintf(buf, sizeof(buf), "QUIT\r\n"); + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); /* Just to be consistent, even though we know it can't have a % in it */ +#endif + res = send(ctxt->controlFd, buf, len, 0); + return(0); +} + +/** + * xmlNanoFTPConnect: + * @ctx: an FTP context + * + * Tries to open a control connection + * + * Returns -1 in case of error, 0 otherwise + */ + +int +xmlNanoFTPConnect(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + struct hostent *hp; + int port; + int res; + + if (ctxt == NULL) + return(-1); + if (ctxt->hostname == NULL) + return(-1); + + /* + * do the blocking DNS query. + */ + if (proxy) + hp = gethostbyname(proxy); + else + hp = gethostbyname(ctxt->hostname); + if (hp == NULL) + return(-1); + + /* + * Prepare the socket + */ + memset(&ctxt->ftpAddr, 0, sizeof(ctxt->ftpAddr)); + ctxt->ftpAddr.sin_family = AF_INET; + memcpy(&ctxt->ftpAddr.sin_addr, hp->h_addr_list[0], hp->h_length); + if (proxy) { + port = proxyPort; + } else { + port = ctxt->port; + } + if (port == 0) + port = 21; + ctxt->ftpAddr.sin_port = htons(port); + ctxt->controlFd = socket(AF_INET, SOCK_STREAM, 0); + if (ctxt->controlFd < 0) + return(-1); + + /* + * Do the connect. + */ + if (connect(ctxt->controlFd, (struct sockaddr *) &ctxt->ftpAddr, + sizeof(struct sockaddr_in)) < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + + /* + * Wait for the HELLO from the server. + */ + res = xmlNanoFTPGetResponse(ctxt); + if (res != 2) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + + /* + * State diagram for the login operation on the FTP server + * + * Reference: RFC 959 + * + * 1 + * +---+ USER +---+------------->+---+ + * | B |---------->| W | 2 ---->| E | + * +---+ +---+------ | -->+---+ + * | | | | | + * 3 | | 4,5 | | | + * -------------- ----- | | | + * | | | | | + * | | | | | + * | --------- | + * | 1| | | | + * V | | | | + * +---+ PASS +---+ 2 | ------>+---+ + * | |---------->| W |------------->| S | + * +---+ +---+ ---------->+---+ + * | | | | | + * 3 | |4,5| | | + * -------------- -------- | + * | | | | | + * | | | | | + * | ----------- + * | 1,3| | | | + * V | 2| | | + * +---+ ACCT +---+-- | ----->+---+ + * | |---------->| W | 4,5 -------->| F | + * +---+ +---+------------->+---+ + * + * Of course in case of using a proxy this get really nasty and is not + * standardized at all :-( + */ + if (proxy) { + int len; + char buf[400]; + + if (proxyUser != NULL) { + /* + * We need proxy auth + */ + snprintf(buf, sizeof(buf), "USER %s\r\n", proxyUser); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + switch (res) { + case 2: + if (proxyPasswd == NULL) + break; + case 3: + if (proxyPasswd != NULL) + snprintf(buf, sizeof(buf), "PASS %s\r\n", proxyPasswd); + else + snprintf(buf, sizeof(buf), "PASS anonymous@\r\n"); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if (res > 3) { + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + return(-1); + } + break; + case 1: + break; + case 4: + case 5: + case -1: + default: + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + return(-1); + } + } + + /* + * We assume we don't need more authentication to the proxy + * and that it succeeded :-\ + */ + switch (proxyType) { + case 0: + /* we will try in sequence */ + case 1: + /* Using SITE command */ + snprintf(buf, sizeof(buf), "SITE %s\r\n", ctxt->hostname); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if (res == 2) { + /* we assume it worked :-\ 1 is error for SITE command */ + proxyType = 1; + break; + } + if (proxyType == 1) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + case 2: + /* USER user@host command */ + if (ctxt->user == NULL) + snprintf(buf, sizeof(buf), "USER anonymous@%s\r\n", + ctxt->hostname); + else + snprintf(buf, sizeof(buf), "USER %s@%s\r\n", + ctxt->user, ctxt->hostname); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if ((res == 1) || (res == 2)) { + /* we assume it worked :-\ */ + proxyType = 2; + return(0); + } + if (ctxt->passwd == NULL) + snprintf(buf, sizeof(buf), "PASS anonymous@\r\n"); + else + snprintf(buf, sizeof(buf), "PASS %s\r\n", ctxt->passwd); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if ((res == 1) || (res == 2)) { + /* we assume it worked :-\ */ + proxyType = 2; + return(0); + } + if (proxyType == 2) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + case 3: + /* + * If you need support for other Proxy authentication scheme + * send the code or at least the sequence in use. + */ + default: + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + } + /* + * Non-proxy handling. + */ + res = xmlNanoFTPSendUser(ctxt); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + res = xmlNanoFTPGetResponse(ctxt); + switch (res) { + case 2: + return(0); + case 3: + break; + case 1: + case 4: + case 5: + case -1: + default: + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + res = xmlNanoFTPSendPasswd(ctxt); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + res = xmlNanoFTPGetResponse(ctxt); + switch (res) { + case 2: + break; + case 3: + xmlGenericError(xmlGenericErrorContext, + "FTP server asking for ACCNT on anonymous\n"); + case 1: + case 4: + case 5: + case -1: + default: + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + + return(0); +} + +/** + * xmlNanoFTPConnectTo: + * @server: an FTP server name + * @port: the port (use 21 if 0) + * + * Tries to open a control connection to the given server/port + * + * Returns an fTP context or NULL if it failed + */ + +void* +xmlNanoFTPConnectTo(const char *server, int port) { + xmlNanoFTPCtxtPtr ctxt; + int res; + + xmlNanoFTPInit(); + if (server == NULL) + return(NULL); + ctxt = (xmlNanoFTPCtxtPtr) xmlNanoFTPNewCtxt(NULL); + ctxt->hostname = xmlMemStrdup(server); + if (port != 0) + ctxt->port = port; + res = xmlNanoFTPConnect(ctxt); + if (res < 0) { + xmlNanoFTPFreeCtxt(ctxt); + return(NULL); + } + return(ctxt); +} + +/** + * xmlNanoFTPCwd: + * @ctx: an FTP context + * @directory: a directory on the server + * + * Tries to change the remote directory + * + * Returns -1 incase of error, 1 if CWD worked, 0 if it failed + */ + +int +xmlNanoFTPCwd(void *ctx, char *directory) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[400]; + int len; + int res; + + /* + * Expected response code for CWD: + * + * CWD + * 250 + * 500, 501, 502, 421, 530, 550 + */ + snprintf(buf, sizeof(buf), "CWD %s\r\n", directory); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) return(res); + res = xmlNanoFTPGetResponse(ctxt); + if (res == 4) { + return(-1); + } + if (res == 2) return(1); + if (res == 5) { + return(0); + } + return(0); +} + +/** + * xmlNanoFTPGetConnection: + * @ctx: an FTP context + * + * Try to open a data connection to the server. Currently only + * passive mode is supported. + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPGetConnection(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[200], *cur; + int len, i; + int res; + unsigned char ad[6], *adp, *portp; + unsigned int temp[6]; + struct sockaddr_in dataAddr; + SOCKLEN_T dataAddrLen; + + ctxt->dataFd = socket (AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (ctxt->dataFd < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetConnection: failed to create socket\n"); + return(-1); + } + dataAddrLen = sizeof(dataAddr); + memset(&dataAddr, 0, dataAddrLen); + dataAddr.sin_family = AF_INET; + + if (ctxt->passive) { + snprintf(buf, sizeof(buf), "PASV\r\n"); + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPReadResponse(ctx); + if (res != 2) { + if (res == 5) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } else { + /* + * retry with an active connection + */ + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + ctxt->passive = 0; + } + } + cur = &ctxt->controlBuf[ctxt->controlBufAnswer]; + while (((*cur < '0') || (*cur > '9')) && *cur != '\0') cur++; + if (sscanf(cur, "%u,%u,%u,%u,%u,%u", &temp[0], &temp[1], &temp[2], + &temp[3], &temp[4], &temp[5]) != 6) { + xmlGenericError(xmlGenericErrorContext, + "Invalid answer to PASV\n"); + if (ctxt->dataFd != -1) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + } + return(-1); + } + for (i=0; i<6; i++) ad[i] = (unsigned char) (temp[i] & 0xff); + memcpy(&dataAddr.sin_addr, &ad[0], 4); + memcpy(&dataAddr.sin_port, &ad[4], 2); + if (connect(ctxt->dataFd, (struct sockaddr *) &dataAddr, dataAddrLen) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to create a data connection\n"); + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return (-1); + } + } else { + getsockname(ctxt->dataFd, (struct sockaddr *) &dataAddr, &dataAddrLen); + dataAddr.sin_port = 0; + if (bind(ctxt->dataFd, (struct sockaddr *) &dataAddr, dataAddrLen) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to bind a port\n"); + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return (-1); + } + getsockname(ctxt->dataFd, (struct sockaddr *) &dataAddr, &dataAddrLen); + + if (listen(ctxt->dataFd, 1) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Could not listen on port %d\n", + ntohs(dataAddr.sin_port)); + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return (-1); + } + adp = (unsigned char *) &dataAddr.sin_addr; + portp = (unsigned char *) &dataAddr.sin_port; + snprintf(buf, sizeof(buf), "PORT %d,%d,%d,%d,%d,%d\r\n", + adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff, + portp[0] & 0xff, portp[1] & 0xff); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if (res != 2) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } + } + return(ctxt->dataFd); + +} + +/** + * xmlNanoFTPCloseConnection: + * @ctx: an FTP context + * + * Close the data connection from the server + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPCloseConnection(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + int res; + fd_set rfd, efd; + struct timeval tv; + + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + tv.tv_sec = 15; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->controlFd, &rfd); + FD_ZERO(&efd); + FD_SET(ctxt->controlFd, &efd); + res = select(ctxt->controlFd + 1, &rfd, NULL, &efd, &tv); + if (res < 0) { +#ifdef DEBUG_FTP + perror("select"); +#endif + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + return(-1); + } + if (res == 0) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPCloseConnection: timeout\n"); +#endif + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + } else { + res = xmlNanoFTPGetResponse(ctxt); + if (res != 2) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + return(-1); + } + } + return(0); +} + +/** + * xmlNanoFTPParseList: + * @list: some data listing received from the server + * @callback: the user callback + * @userData: the user callback data + * + * Parse at most one entry from the listing. + * + * Returns -1 incase of error, the length of data parsed otherwise + */ + +static int +xmlNanoFTPParseList(const char *list, ftpListCallback callback, void *userData) { + const char *cur = list; + char filename[151]; + char attrib[11]; + char owner[11]; + char group[11]; + char month[4]; + int year = 0; + int minute = 0; + int hour = 0; + int day = 0; + unsigned long size = 0; + int links = 0; + int i; + + if (!strncmp(cur, "total", 5)) { + cur += 5; + while (*cur == ' ') cur++; + while ((*cur >= '0') && (*cur <= '9')) + links = (links * 10) + (*cur++ - '0'); + while ((*cur == ' ') || (*cur == '\n') || (*cur == '\r')) + cur++; + return(cur - list); + } else if (*list == '+') { + return(0); + } else { + while ((*cur == ' ') || (*cur == '\n') || (*cur == '\r')) + cur++; + if (*cur == 0) return(0); + i = 0; + while (*cur != ' ') { + if (i < 10) + attrib[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + attrib[10] = 0; + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + while ((*cur >= '0') && (*cur <= '9')) + links = (links * 10) + (*cur++ - '0'); + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + i = 0; + while (*cur != ' ') { + if (i < 10) + owner[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + owner[i] = 0; + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + i = 0; + while (*cur != ' ') { + if (i < 10) + group[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + group[i] = 0; + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + while ((*cur >= '0') && (*cur <= '9')) + size = (size * 10) + (*cur++ - '0'); + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + i = 0; + while (*cur != ' ') { + if (i < 3) + month[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + month[i] = 0; + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + while ((*cur >= '0') && (*cur <= '9')) + day = (day * 10) + (*cur++ - '0'); + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + if ((cur[1] == 0) || (cur[2] == 0)) return(0); + if ((cur[1] == ':') || (cur[2] == ':')) { + while ((*cur >= '0') && (*cur <= '9')) + hour = (hour * 10) + (*cur++ - '0'); + if (*cur == ':') cur++; + while ((*cur >= '0') && (*cur <= '9')) + minute = (minute * 10) + (*cur++ - '0'); + } else { + while ((*cur >= '0') && (*cur <= '9')) + year = (year * 10) + (*cur++ - '0'); + } + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + i = 0; + while ((*cur != '\n') && (*cur != '\r')) { + if (i < 150) + filename[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + filename[i] = 0; + if ((*cur != '\n') && (*cur != '\r')) + return(0); + while ((*cur == '\n') || (*cur == '\r')) + cur++; + } + if (callback != NULL) { + callback(userData, filename, attrib, owner, group, size, links, + year, month, day, hour, minute); + } + return(cur - list); +} + +/** + * xmlNanoFTPList: + * @ctx: an FTP context + * @callback: the user callback + * @userData: the user callback data + * @filename: optional files to list + * + * Do a listing on the server. All files info are passed back + * in the callbacks. + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPList(void *ctx, ftpListCallback callback, void *userData, + char *filename) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[4096 + 1]; + int len, res; + int indx = 0, base; + fd_set rfd, efd; + struct timeval tv; + + if (filename == NULL) { + if (xmlNanoFTPCwd(ctxt, ctxt->path) < 1) + return(-1); + ctxt->dataFd = xmlNanoFTPGetConnection(ctxt); + if (ctxt->dataFd == -1) + return(-1); + snprintf(buf, sizeof(buf), "LIST -L\r\n"); + } else { + if (filename[0] != '/') { + if (xmlNanoFTPCwd(ctxt, ctxt->path) < 1) + return(-1); + } + ctxt->dataFd = xmlNanoFTPGetConnection(ctxt); + if (ctxt->dataFd == -1) + return(-1); + snprintf(buf, sizeof(buf), "LIST -L %s\r\n", filename); + } + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPReadResponse(ctxt); + if (res != 1) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-res); + } + + do { + tv.tv_sec = 1; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->dataFd, &rfd); + FD_ZERO(&efd); + FD_SET(ctxt->dataFd, &efd); + res = select(ctxt->dataFd + 1, &rfd, NULL, &efd, &tv); + if (res < 0) { +#ifdef DEBUG_FTP + perror("select"); +#endif + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } + if (res == 0) { + res = xmlNanoFTPCheckResponse(ctxt); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + ctxt->dataFd = -1; + return(-1); + } + if (res == 2) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(0); + } + + continue; + } + + if ((len = recv(ctxt->dataFd, &buf[indx], sizeof(buf) - (indx + 1), 0)) < 0) { +#ifdef DEBUG_FTP + perror("recv"); +#endif + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + ctxt->dataFd = -1; + return(-1); + } +#ifdef DEBUG_FTP + write(1, &buf[indx], len); +#endif + indx += len; + buf[indx] = 0; + base = 0; + do { + res = xmlNanoFTPParseList(&buf[base], callback, userData); + base += res; + } while (res > 0); + + memmove(&buf[0], &buf[base], indx - base); + indx -= base; + } while (len != 0); + xmlNanoFTPCloseConnection(ctxt); + return(0); +} + +/** + * xmlNanoFTPGetSocket: + * @ctx: an FTP context + * @filename: the file to retrieve (or NULL if path is in context). + * + * Initiate fetch of the given file from the server. + * + * Returns the socket for the data connection, or <0 in case of error + */ + + +int +xmlNanoFTPGetSocket(void *ctx, const char *filename) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[300]; + int res, len; + if ((filename == NULL) && (ctxt->path == NULL)) + return(-1); + ctxt->dataFd = xmlNanoFTPGetConnection(ctxt); + if (ctxt->dataFd == -1) + return(-1); + + snprintf(buf, sizeof(buf), "TYPE I\r\n"); + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPReadResponse(ctxt); + if (res != 2) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-res); + } + if (filename == NULL) + snprintf(buf, sizeof(buf), "RETR %s\r\n", ctxt->path); + else + snprintf(buf, sizeof(buf), "RETR %s\r\n", filename); + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "%s", buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPReadResponse(ctxt); + if (res != 1) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-res); + } + return(ctxt->dataFd); +} + +/** + * xmlNanoFTPGet: + * @ctx: an FTP context + * @callback: the user callback + * @userData: the user callback data + * @filename: the file to retrieve + * + * Fetch the given file from the server. All data are passed back + * in the callbacks. The last callback has a size of 0 block. + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPGet(void *ctx, ftpDataCallback callback, void *userData, + const char *filename) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[4096]; + int len = 0, res; + fd_set rfd; + struct timeval tv; + + if ((filename == NULL) && (ctxt->path == NULL)) + return(-1); + if (callback == NULL) + return(-1); + if (xmlNanoFTPGetSocket(ctxt, filename) < 0) + return(-1); + + do { + tv.tv_sec = 1; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->dataFd, &rfd); + res = select(ctxt->dataFd + 1, &rfd, NULL, NULL, &tv); + if (res < 0) { +#ifdef DEBUG_FTP + perror("select"); +#endif + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } + if (res == 0) { + res = xmlNanoFTPCheckResponse(ctxt); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + ctxt->dataFd = -1; + return(-1); + } + if (res == 2) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(0); + } + + continue; + } + if ((len = recv(ctxt->dataFd, buf, sizeof(buf), 0)) < 0) { + callback(userData, buf, len); + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } + callback(userData, buf, len); + } while (len != 0); + + return(xmlNanoFTPCloseConnection(ctxt)); +} + +/** + * xmlNanoFTPRead: + * @ctx: the FTP context + * @dest: a buffer + * @len: the buffer length + * + * This function tries to read @len bytes from the existing FTP connection + * and saves them in @dest. This is a blocking call. + * + * Returns the number of byte read. 0 is an indication of an end of connection. + * -1 indicates a parameter error. + */ +int +xmlNanoFTPRead(void *ctx, void *dest, int len) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + + if (ctx == NULL) return(-1); + if (ctxt->dataFd < 0) return(0); + if (dest == NULL) return(-1); + if (len <= 0) return(0); + + len = recv(ctxt->dataFd, dest, len, 0); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "Recvd %d bytes\n", len); +#endif + if (len <= 0) { + xmlNanoFTPCloseConnection(ctxt); + } + return(len); +} + +/** + * xmlNanoFTPOpen: + * @URL: the URL to the resource + * + * Start to fetch the given ftp:// resource + * + * Returns an FTP context, or NULL + */ + +void* +xmlNanoFTPOpen(const char *URL) { + xmlNanoFTPCtxtPtr ctxt; + int sock; + + xmlNanoFTPInit(); + if (URL == NULL) return(NULL); + if (strncmp("ftp://", URL, 6)) return(NULL); + + ctxt = (xmlNanoFTPCtxtPtr) xmlNanoFTPNewCtxt(URL); + if (ctxt == NULL) return(NULL); + if (xmlNanoFTPConnect(ctxt) < 0) { + xmlNanoFTPFreeCtxt(ctxt); + return(NULL); + } + sock = xmlNanoFTPGetSocket(ctxt, ctxt->path); + if (sock < 0) { + xmlNanoFTPFreeCtxt(ctxt); + return(NULL); + } + return(ctxt); +} + +/** + * xmlNanoFTPClose: + * @ctx: an FTP context + * + * Close the connection and both control and transport + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPClose(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + + if (ctxt == NULL) + return(-1); + + if (ctxt->dataFd >= 0) { + closesocket(ctxt->dataFd); + ctxt->dataFd = -1; + } + if (ctxt->controlFd >= 0) { + xmlNanoFTPQuit(ctxt); + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + } + xmlNanoFTPFreeCtxt(ctxt); + return(0); +} + +#ifdef STANDALONE +/************************************************************************ + * * + * Basic test in Standalone mode * + * * + ************************************************************************/ +static +void ftpList(void *userData, const char *filename, const char* attrib, + const char *owner, const char *group, unsigned long size, int links, + int year, const char *month, int day, int hour, int minute) { + xmlGenericError(xmlGenericErrorContext, + "%s %s %s %ld %s\n", attrib, owner, group, size, filename); +} +static +void ftpData(void *userData, const char *data, int len) { + if (userData == NULL) return; + if (len <= 0) { + fclose(userData); + return; + } + fwrite(data, len, 1, userData); +} + +int main(int argc, char **argv) { + void *ctxt; + FILE *output; + char *tstfile = NULL; + + xmlNanoFTPInit(); + if (argc > 1) { + ctxt = xmlNanoFTPNewCtxt(argv[1]); + if (xmlNanoFTPConnect(ctxt) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Couldn't connect to %s\n", argv[1]); + exit(1); + } + if (argc > 2) + tstfile = argv[2]; + } else + ctxt = xmlNanoFTPConnectTo("localhost", 0); + if (ctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Couldn't connect to localhost\n"); + exit(1); + } + xmlNanoFTPList(ctxt, ftpList, NULL, tstfile); + output = fopen("/tmp/tstdata", "w"); + if (output != NULL) { + if (xmlNanoFTPGet(ctxt, ftpData, (void *) output, tstfile) < 0) + xmlGenericError(xmlGenericErrorContext, + "Failed to get file\n"); + + } + xmlNanoFTPClose(ctxt); + xmlMemoryDump(); + exit(0); +} +#endif /* STANDALONE */ +#else /* !LIBXML_FTP_ENABLED */ +#ifdef STANDALONE +#include <stdio.h> +int main(int argc, char **argv) { + xmlGenericError(xmlGenericErrorContext, + "%s : FTP support not compiled in\n", argv[0]); + return(0); +} +#endif /* STANDALONE */ +#endif /* LIBXML_FTP_ENABLED */ diff --git a/bundle/libxml/nanohttp.c b/bundle/libxml/nanohttp.c new file mode 100644 index 0000000000..5a126b8868 --- /dev/null +++ b/bundle/libxml/nanohttp.c @@ -0,0 +1,1528 @@ +/* + * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets. + * focuses on size, streamability, reentrancy and portability + * + * This is clearly not a general purpose HTTP implementation + * If you look for one, check: + * http://www.w3.org/Library/ + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +/* TODO add compression support, Send the Accept- , and decompress on the + fly with ZLIB if found at compile-time */ + +#define NEED_SOCKETS +#define IN_LIBXML +#include "libxml.h" +/* Hack to get compilation to work -SH */ +#include <errno.h> + +#ifdef LIBXML_HTTP_ENABLED +#include <string.h> + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_SYS_SOCKET_H +#include <sys/socket.h> +#endif +#ifdef HAVE_NETINET_IN_H +#include <netinet/in.h> +#endif +#ifdef HAVE_ARPA_INET_H +#include <arpa/inet.h> +#endif +#ifdef HAVE_NETDB_H +#include <netdb.h> +#endif +#ifdef HAVE_RESOLV_H +#ifdef HAVE_ARPA_NAMESER_H +#include <arpa/nameser.h> +#endif +#include <resolv.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif +#ifdef SUPPORT_IP6 +#include <resolv.h> +#endif + +#ifdef VMS +#include <stropts> +#define SOCKLEN_T unsigned int +#define SOCKET int +#endif + +#include <libxml/globals.h> +#include <libxml/xmlerror.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> /* for xmlStr(n)casecmp() */ +#include <libxml/nanohttp.h> +#include <libxml/globals.h> +#include <libxml/uri.h> + +/** + * A couple portability macros + */ +#ifndef _WINSOCKAPI_ +#define closesocket(s) close(s) +#define SOCKET int +#endif + +#ifndef SOCKLEN_T +#define SOCKLEN_T unsigned int +#endif +#ifndef SOCKET +#define SOCKET int +#endif + +#ifdef STANDALONE +#define DEBUG_HTTP +#define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n) +#define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b) +#endif + +#define XML_NANO_HTTP_MAX_REDIR 10 + +#define XML_NANO_HTTP_CHUNK 4096 + +#define XML_NANO_HTTP_CLOSED 0 +#define XML_NANO_HTTP_WRITE 1 +#define XML_NANO_HTTP_READ 2 +#define XML_NANO_HTTP_NONE 4 + +typedef struct xmlNanoHTTPCtxt { + char *protocol; /* the protocol name */ + char *hostname; /* the host name */ + int port; /* the port */ + char *path; /* the path within the URL */ + SOCKET fd; /* the file descriptor for the socket */ + int state; /* WRITE / READ / CLOSED */ + char *out; /* buffer sent (zero terminated) */ + char *outptr; /* index within the buffer sent */ + char *in; /* the receiving buffer */ + char *content; /* the start of the content */ + char *inptr; /* the next byte to read from network */ + char *inrptr; /* the next byte to give back to the client */ + int inlen; /* len of the input buffer */ + int last; /* return code for last operation */ + int returnValue; /* the protocol return value */ + int ContentLength; /* specified content length from HTTP header */ + char *contentType; /* the MIME type for the input */ + char *location; /* the new URL in case of redirect */ + char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */ +} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr; + +static int initialized = 0; +static char *proxy = NULL; /* the proxy name if any */ +static int proxyPort; /* the proxy port if any */ +static unsigned int timeout = 60;/* the select() timeout in seconds */ + +int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ); +int xmlNanoHTTPContentLength( void * ctx ); + +/** + * A portability function + */ +static int socket_errno(void) { +#ifdef _WINSOCKAPI_ + return(WSAGetLastError()); +#else + return(errno); +#endif +} + +/** + * xmlNanoHTTPInit: + * + * Initialize the HTTP protocol layer. + * Currently it just checks for proxy informations + */ + +void +xmlNanoHTTPInit(void) { + const char *env; +#ifdef _WINSOCKAPI_ + WSADATA wsaData; +#endif + + if (initialized) + return; + +#ifdef _WINSOCKAPI_ + if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0) + return; +#endif + + if (proxy == NULL) { + proxyPort = 80; + env = getenv("no_proxy"); + if (env != NULL) + goto done; + env = getenv("http_proxy"); + if (env != NULL) { + xmlNanoHTTPScanProxy(env); + goto done; + } + env = getenv("HTTP_PROXY"); + if (env != NULL) { + xmlNanoHTTPScanProxy(env); + goto done; + } + } +done: + initialized = 1; +} + +/** + * xmlNanoHTTPCleanup: + * + * Cleanup the HTTP protocol layer. + */ + +void +xmlNanoHTTPCleanup(void) { + if (proxy != NULL) + xmlFree(proxy); +#ifdef _WINSOCKAPI_ + if (initialized) + WSACleanup(); +#endif + initialized = 0; + return; +} + +/** + * xmlNanoHTTPScanURL: + * @ctxt: an HTTP context + * @URL: The URL used to initialize the context + * + * (Re)Initialize an HTTP context by parsing the URL and finding + * the protocol host port and path it indicates. + */ + +static void +xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) { + const char *cur = URL; + char buf[4096]; + int indx = 0; + int port = 0; + + if (ctxt->protocol != NULL) { + xmlFree(ctxt->protocol); + ctxt->protocol = NULL; + } + if (ctxt->hostname != NULL) { + xmlFree(ctxt->hostname); + ctxt->hostname = NULL; + } + if (ctxt->path != NULL) { + xmlFree(ctxt->path); + ctxt->path = NULL; + } + if (URL == NULL) return; + buf[indx] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[indx] = 0; + ctxt->protocol = xmlMemStrdup(buf); + indx = 0; + cur += 3; + break; + } + buf[indx++] = *cur++; + } + if (*cur == 0) return; + + buf[indx] = 0; + while (1) { + if (cur[0] == ':') { + buf[indx] = 0; + ctxt->hostname = xmlMemStrdup(buf); + indx = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != 0) ctxt->port = port; + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[indx] = 0; + ctxt->hostname = xmlMemStrdup(buf); + indx = 0; + break; + } + buf[indx++] = *cur++; + } + if (*cur == 0) + ctxt->path = xmlMemStrdup("/"); + else { + indx = 0; + buf[indx] = 0; + while (*cur != 0) + buf[indx++] = *cur++; + buf[indx] = 0; + ctxt->path = xmlMemStrdup(buf); + } +} + +/** + * xmlNanoHTTPScanProxy: + * @URL: The proxy URL used to initialize the proxy context + * + * (Re)Initialize the HTTP Proxy context by parsing the URL and finding + * the protocol host port it indicates. + * Should be like http://myproxy/ or http://myproxy:3128/ + * A NULL URL cleans up proxy informations. + */ + +void +xmlNanoHTTPScanProxy(const char *URL) { + const char *cur = URL; + char buf[4096]; + int indx = 0; + int port = 0; + + if (proxy != NULL) { + xmlFree(proxy); + proxy = NULL; + } + if (proxyPort != 0) { + proxyPort = 0; + } +#ifdef DEBUG_HTTP + if (URL == NULL) + xmlGenericError(xmlGenericErrorContext, + "Removing HTTP proxy info\n"); + else + xmlGenericError(xmlGenericErrorContext, + "Using HTTP proxy %s\n", URL); +#endif + if (URL == NULL) return; + buf[indx] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[indx] = 0; + indx = 0; + cur += 3; + break; + } + buf[indx++] = *cur++; + } + if (*cur == 0) return; + + buf[indx] = 0; + while (1) { + if (cur[0] == ':') { + buf[indx] = 0; + proxy = xmlMemStrdup(buf); + indx = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != 0) proxyPort = port; + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[indx] = 0; + proxy = xmlMemStrdup(buf); + indx = 0; + break; + } + buf[indx++] = *cur++; + } +} + +/** + * xmlNanoHTTPNewCtxt: + * @URL: The URL used to initialize the context + * + * Allocate and initialize a new HTTP context. + * + * Returns an HTTP context or NULL in case of error. + */ + +static xmlNanoHTTPCtxtPtr +xmlNanoHTTPNewCtxt(const char *URL) { + xmlNanoHTTPCtxtPtr ret; + xmlChar *escaped; + + ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt)); + if (ret == NULL) return(NULL); + + memset(ret, 0, sizeof(xmlNanoHTTPCtxt)); + ret->port = 80; + ret->returnValue = 0; + ret->fd = -1; + ret->ContentLength = -1; + + escaped = xmlURIEscapeStr(BAD_CAST URL, BAD_CAST"@/:=?;#%&"); + if (escaped != NULL) { + xmlNanoHTTPScanURL(ret, (const char *) escaped); + xmlFree(escaped); + } else { + xmlNanoHTTPScanURL(ret, URL); + } + + return(ret); +} + +/** + * xmlNanoHTTPFreeCtxt: + * @ctxt: an HTTP context + * + * Frees the context after closing the connection. + */ + +static void +xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) { + if (ctxt == NULL) return; + if (ctxt->hostname != NULL) xmlFree(ctxt->hostname); + if (ctxt->protocol != NULL) xmlFree(ctxt->protocol); + if (ctxt->path != NULL) xmlFree(ctxt->path); + if (ctxt->out != NULL) xmlFree(ctxt->out); + if (ctxt->in != NULL) xmlFree(ctxt->in); + if (ctxt->contentType != NULL) xmlFree(ctxt->contentType); + if (ctxt->location != NULL) xmlFree(ctxt->location); + if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader); + ctxt->state = XML_NANO_HTTP_NONE; + if (ctxt->fd >= 0) closesocket(ctxt->fd); + ctxt->fd = -1; + xmlFree(ctxt); +} + +/** + * xmlNanoHTTPSend: + * @ctxt: an HTTP context + * + * Send the input needed to initiate the processing on the server side + * Returns number of bytes sent or -1 on error. + */ + +static int +xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char * xmt_ptr, int outlen) { + + int total_sent = 0; + + if ( (ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL ) ) { + while (total_sent < outlen) { + int nsent = send(ctxt->fd, xmt_ptr + total_sent, + outlen - total_sent, 0); + if (nsent>0) + total_sent += nsent; + else if ( ( nsent == -1 ) && +#if defined(EAGAIN) && EAGAIN != EWOULDBLOCK + ( socket_errno( ) != EAGAIN ) && +#endif + ( socket_errno( ) != EWOULDBLOCK ) ) { + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPSend error: %s", + strerror( socket_errno( ) ) ); + + if ( total_sent == 0 ) + total_sent = -1; + break; + } + else { + /* + ** No data sent + ** Since non-blocking sockets are used, wait for + ** socket to be writable or default timeout prior + ** to retrying. + */ + + struct timeval tv; + fd_set wfd; + + tv.tv_sec = timeout; + tv.tv_usec = 0; + FD_ZERO( &wfd ); + FD_SET( ctxt->fd, &wfd ); + (void)select( ctxt->fd + 1, NULL, &wfd, NULL, &tv ); + } + } + } + + return total_sent; +} + +/** + * xmlNanoHTTPRecv: + * @ctxt: an HTTP context + * + * Read information coming from the HTTP connection. + * This is a blocking call (but it blocks in select(), not read()). + * + * Returns the number of byte read or -1 in case of error. + */ + +static int +xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) { + fd_set rfd; + struct timeval tv; + + + while (ctxt->state & XML_NANO_HTTP_READ) { + if (ctxt->in == NULL) { + ctxt->in = (char *) xmlMalloc(65000 * sizeof(char)); + if (ctxt->in == NULL) { + ctxt->last = -1; + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPRecv: Error allocating input memory." ); + return(-1); + } + ctxt->inlen = 65000; + ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in; + } + if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) { + int delta = ctxt->inrptr - ctxt->in; + int len = ctxt->inptr - ctxt->inrptr; + + memmove(ctxt->in, ctxt->inrptr, len); + ctxt->inrptr -= delta; + ctxt->content -= delta; + ctxt->inptr -= delta; + } + if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) { + int d_inptr = ctxt->inptr - ctxt->in; + int d_content = ctxt->content - ctxt->in; + int d_inrptr = ctxt->inrptr - ctxt->in; + char * tmp_ptr = ctxt->in; + + ctxt->inlen *= 2; + ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen); + if (ctxt->in == NULL) { + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPRecv: %s %d bytes.", + "Failed to realloc input buffer to", + ctxt->inlen ); + xmlFree( tmp_ptr ); + ctxt->last = -1; + return(-1); + } + ctxt->inptr = ctxt->in + d_inptr; + ctxt->content = ctxt->in + d_content; + ctxt->inrptr = ctxt->in + d_inrptr; + } + ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0); + if (ctxt->last > 0) { + ctxt->inptr += ctxt->last; + return(ctxt->last); + } + if (ctxt->last == 0) { + return(0); + } + if (ctxt->last == -1) { + switch (socket_errno()) { + case EINPROGRESS: + case EWOULDBLOCK: +#if defined(EAGAIN) && EAGAIN != EWOULDBLOCK + case EAGAIN: +#endif + break; + + case ECONNRESET: + case ESHUTDOWN: + return ( 0 ); + + default: + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPRecv: recv( ) failure - %s", + strerror( socket_errno( ) ) ); + return(-1); + } + } + + tv.tv_sec = timeout; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->fd, &rfd); + + if ( (select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1) +#if defined(EINTR) + && (errno != EINTR) +#endif + ) + return(0); + } + return(0); +} + +/** + * xmlNanoHTTPReadLine: + * @ctxt: an HTTP context + * + * Read one line in the HTTP server output, usually for extracting + * the HTTP protocol informations from the answer header. + * + * Returns a newly allocated string with a copy of the line, or NULL + * which indicate the end of the input. + */ + +static char * +xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) { + char buf[4096]; + char *bp = buf; + int rc; + + while (bp - buf < 4095) { + if (ctxt->inrptr == ctxt->inptr) { + if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) { + if (bp == buf) + return(NULL); + else + *bp = 0; + return(xmlMemStrdup(buf)); + } + else if ( rc == -1 ) { + return ( NULL ); + } + } + *bp = *ctxt->inrptr++; + if (*bp == '\n') { + *bp = 0; + return(xmlMemStrdup(buf)); + } + if (*bp != '\r') + bp++; + } + buf[4095] = 0; + return(xmlMemStrdup(buf)); +} + + +/** + * xmlNanoHTTPScanAnswer: + * @ctxt: an HTTP context + * @line: an HTTP header line + * + * Try to extract useful informations from the server answer. + * We currently parse and process: + * - The HTTP revision/ return code + * - The Content-Type + * - The Location for redirect processing. + * + * Returns -1 in case of failure, the file descriptor number otherwise + */ + +static void +xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) { + const char *cur = line; + + if (line == NULL) return; + + if (!strncmp(line, "HTTP/", 5)) { + int version = 0; + int ret = 0; + + cur += 5; + while ((*cur >= '0') && (*cur <= '9')) { + version *= 10; + version += *cur - '0'; + cur++; + } + if (*cur == '.') { + cur++; + if ((*cur >= '0') && (*cur <= '9')) { + version *= 10; + version += *cur - '0'; + cur++; + } + while ((*cur >= '0') && (*cur <= '9')) + cur++; + } else + version *= 10; + if ((*cur != ' ') && (*cur != '\t')) return; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if ((*cur < '0') || (*cur > '9')) return; + while ((*cur >= '0') && (*cur <= '9')) { + ret *= 10; + ret += *cur - '0'; + cur++; + } + if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return; + ctxt->returnValue = ret; + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) { + cur += 13; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if (ctxt->contentType != NULL) + xmlFree(ctxt->contentType); + ctxt->contentType = xmlMemStrdup(cur); + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) { + cur += 12; + if (ctxt->contentType != NULL) return; + while ((*cur == ' ') || (*cur == '\t')) cur++; + ctxt->contentType = xmlMemStrdup(cur); + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) { + cur += 9; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if (ctxt->location != NULL) + xmlFree(ctxt->location); + ctxt->location = xmlMemStrdup(cur); + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) { + cur += 17; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if (ctxt->authHeader != NULL) + xmlFree(ctxt->authHeader); + ctxt->authHeader = xmlMemStrdup(cur); + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) { + cur += 19; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if (ctxt->authHeader != NULL) + xmlFree(ctxt->authHeader); + ctxt->authHeader = xmlMemStrdup(cur); + } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) { + cur += 15; + ctxt->ContentLength = strtol( cur, NULL, 10 ); + } +} + +/** + * xmlNanoHTTPConnectAttempt: + * @addr: a socket address structure + * + * Attempt a connection to the given IP:port endpoint. It forces + * non-blocking semantic on the socket, and allow 60 seconds for + * the host to answer. + * + * Returns -1 in case of failure, the file descriptor number otherwise + */ + +static int +xmlNanoHTTPConnectAttempt(struct sockaddr *addr) +{ + SOCKET s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + fd_set wfd; + struct timeval tv; + int status; + + if (s==-1) { +#ifdef DEBUG_HTTP + perror("socket"); +#endif + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPConnectAttempt: %s - %s", + "socket creation failure", + strerror( socket_errno( ) ) ); + return(-1); + } + +#ifdef _WINSOCKAPI_ + { + u_long one = 1; + + status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0; + } +#else /* _WINSOCKAPI_ */ +#if defined(VMS) + { + int enable = 1; + status = ioctl(s, FIONBIO, &enable); + } +#else /* VMS */ + if ((status = fcntl(s, F_GETFL, 0)) != -1) { +#ifdef O_NONBLOCK + status |= O_NONBLOCK; +#else /* O_NONBLOCK */ +#ifdef F_NDELAY + status |= F_NDELAY; +#endif /* F_NDELAY */ +#endif /* !O_NONBLOCK */ + status = fcntl(s, F_SETFL, status); + } + if (status < 0) { +#ifdef DEBUG_HTTP + perror("nonblocking"); +#endif + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPConnectAttempt: %s - %s", + "error setting non-blocking IO", + strerror( socket_errno( ) ) ); + closesocket(s); + return(-1); + } +#endif /* !VMS */ +#endif /* !_WINSOCKAPI_ */ + + if ((connect(s, addr, sizeof(*addr))==-1)) { + switch (socket_errno()) { + case EINPROGRESS: + case EWOULDBLOCK: + break; + default: + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPConnectAttempt: %s - %s", + "error connecting to HTTP server", + strerror( socket_errno( ) ) ); + closesocket(s); + return(-1); + } + } + + tv.tv_sec = timeout; + tv.tv_usec = 0; + + FD_ZERO(&wfd); + FD_SET(s, &wfd); + + switch(select(s+1, NULL, &wfd, NULL, &tv)) + { + case 0: + /* Time out */ + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPConnectAttempt: %s", + "Connect attempt timed out." ); + closesocket(s); + return(-1); + case -1: + /* Ermm.. ?? */ + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPConnectAttempt: %s - %s", + "Error connecting to host", + strerror( socket_errno( ) ) ); + closesocket(s); + return(-1); + } + + if ( FD_ISSET(s, &wfd) ) { + SOCKLEN_T len; + len = sizeof(status); + if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&status, &len) < 0 ) { + /* Solaris error code */ + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPConnectAttempt: %s - %s", + "Error retrieving pending socket errors", + strerror( socket_errno( ) ) ); + return (-1); + } + if ( status ) { + closesocket(s); + errno = status; + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPConnectAttempt: %s - %s", + "Error connecting to remote host", + strerror( status ) ); + return (-1); + } + } else { + /* pbm */ + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPConnectAttempt: %s\n", + "Select returned, but descriptor not set for connection.\n" ); + closesocket(s); + return (-1); + } + + return(s); +} + +/** + * xmlNanoHTTPConnectHost: + * @host: the host name + * @port: the port number + * + * Attempt a connection to the given host:port endpoint. It tries + * the multiple IP provided by the DNS if available. + * + * Returns -1 in case of failure, the file descriptor number otherwise + */ + +static int +xmlNanoHTTPConnectHost(const char *host, int port) +{ + struct hostent *h; + struct sockaddr *addr; + struct in_addr ia; + struct sockaddr_in sockin; + +#ifdef SUPPORT_IP6 + struct in6_addr ia6; + struct sockaddr_in6 sockin6; +#endif + int i; + int s; + +#if defined(SUPPORT_IP6) && defined(RES_USE_INET6) + if (!(_res.options & RES_INIT)) + res_init(); + _res.options |= RES_USE_INET6; +#endif + h = gethostbyname(host); + if (h == NULL) { + +/* + * Okay, I got fed up by the non-portability of this error message + * extraction code. it work on Linux, if it work on your platform + * and one want to enable it, send me the defined(foobar) needed + */ +#if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(linux) + const char *h_err_txt = ""; + + switch (h_errno) { + case HOST_NOT_FOUND: + h_err_txt = "Authoritive host not found"; + break; + + case TRY_AGAIN: + h_err_txt = + "Non-authoritive host not found or server failure."; + break; + + case NO_RECOVERY: + h_err_txt = + "Non-recoverable errors: FORMERR, REFUSED, or NOTIMP."; + break; + + case NO_ADDRESS: + h_err_txt = + "Valid name, no data record of requested type."; + break; + + default: + h_err_txt = "No error text defined."; + break; + } + xmlGenericError(xmlGenericErrorContext, + "xmlNanoHTTPConnectHost: %s '%s' - %s", + "Failed to resolve host", host, h_err_txt); +#else + xmlGenericError(xmlGenericErrorContext, + "xmlNanoHTTPConnectHost: %s '%s'", + "Failed to resolve host", host); +#endif + return (-1); + } + + for (i = 0; h->h_addr_list[i]; i++) { + if (h->h_addrtype == AF_INET) { + /* A records (IPv4) */ + memcpy(&ia, h->h_addr_list[i], h->h_length); + sockin.sin_family = h->h_addrtype; + sockin.sin_addr = ia; + sockin.sin_port = htons(port); + addr = (struct sockaddr *) &sockin; +#ifdef SUPPORT_IP6 + } else if (h->h_addrtype == AF_INET6) { + /* AAAA records (IPv6) */ + memcpy(&ia6, h->h_addr_list[i], h->h_length); + sockin6.sin_family = h->h_addrtype; + sockin6.sin_addr = ia6; + sockin6.sin_port = htons(port); + addr = (struct sockaddr *) &sockin6; +#endif + } else + break; /* for */ + + s = xmlNanoHTTPConnectAttempt(addr); + if (s != -1) + return (s); + } + +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoHTTPConnectHost: unable to connect to '%s'.\n", + host); +#endif + return (-1); +} + + +/** + * xmlNanoHTTPOpen: + * @URL: The URL to load + * @contentType: if available the Content-Type information will be + * returned at that location + * + * This function try to open a connection to the indicated resource + * via HTTP GET. + * + * Returns NULL in case of failure, otherwise a request handler. + * The contentType, if provided must be freed by the caller + */ + +void* +xmlNanoHTTPOpen(const char *URL, char **contentType) { + if (contentType != NULL) *contentType = NULL; + return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0)); +} + +/** + * xmlNanoHTTPOpenRedir: + * @URL: The URL to load + * @contentType: if available the Content-Type information will be + * returned at that location + * @redir: if available the redirected URL will be returned + * + * This function try to open a connection to the indicated resource + * via HTTP GET. + * + * Returns NULL in case of failure, otherwise a request handler. + * The contentType, if provided must be freed by the caller + */ + +void* +xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) { + if (contentType != NULL) *contentType = NULL; + if (redir != NULL) *redir = NULL; + return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0)); +} + +/** + * xmlNanoHTTPRead: + * @ctx: the HTTP context + * @dest: a buffer + * @len: the buffer length + * + * This function tries to read @len bytes from the existing HTTP connection + * and saves them in @dest. This is a blocking call. + * + * Returns the number of byte read. 0 is an indication of an end of connection. + * -1 indicates a parameter error. + */ +int +xmlNanoHTTPRead(void *ctx, void *dest, int len) { + xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; + + if (ctx == NULL) return(-1); + if (dest == NULL) return(-1); + if (len <= 0) return(0); + + while (ctxt->inptr - ctxt->inrptr < len) { + if (xmlNanoHTTPRecv(ctxt) <= 0) break; + } + if (ctxt->inptr - ctxt->inrptr < len) + len = ctxt->inptr - ctxt->inrptr; + memcpy(dest, ctxt->inrptr, len); + ctxt->inrptr += len; + return(len); +} + +/** + * xmlNanoHTTPClose: + * @ctx: the HTTP context + * + * This function closes an HTTP context, it ends up the connection and + * free all data related to it. + */ +void +xmlNanoHTTPClose(void *ctx) { + xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; + + if (ctx == NULL) return; + + xmlNanoHTTPFreeCtxt(ctxt); +} + +/** + * xmlNanoHTTPMethodRedir: + * @URL: The URL to load + * @method: the HTTP method to use + * @input: the input string if any + * @contentType: the Content-Type information IN and OUT + * @redir: the redirected URL OUT + * @headers: the extra headers + * @ilen: input length + * + * This function try to open a connection to the indicated resource + * via HTTP using the given @method, adding the given extra headers + * and the input buffer for the request content. + * + * Returns NULL in case of failure, otherwise a request handler. + * The contentType, or redir, if provided must be freed by the caller + */ + +void* +xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input, + char **contentType, char **redir, + const char *headers, int ilen ) { + xmlNanoHTTPCtxtPtr ctxt; + char *bp, *p; + int blen, ret; + int head; + int xmt_bytes; + int nbRedirects = 0; + char *redirURL = NULL; + + if (URL == NULL) return(NULL); + if (method == NULL) method = "GET"; + xmlNanoHTTPInit(); + +retry: + if (redirURL == NULL) + ctxt = xmlNanoHTTPNewCtxt(URL); + else { + ctxt = xmlNanoHTTPNewCtxt(redirURL); + } + + if ( ctxt == NULL ) { + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPMethodRedir: %s %s.", + "Unable to allocate HTTP context to URI", + ( ( redirURL == NULL ) ? URL : redirURL ) ); + return ( NULL ); + } + + if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) { + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPMethodRedir: %s - %s.", + "Not a valid HTTP URI", + ( ( redirURL == NULL ) ? URL : redirURL ) ); + xmlNanoHTTPFreeCtxt(ctxt); + if (redirURL != NULL) xmlFree(redirURL); + return(NULL); + } + if (ctxt->hostname == NULL) { + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPMethodRedir: %s - %s", + "Failed to identify host in URI", + ( ( redirURL == NULL ) ? URL : redirURL ) ); + xmlNanoHTTPFreeCtxt(ctxt); + if (redirURL != NULL) xmlFree(redirURL); + return(NULL); + } + if (proxy) { + blen = strlen(ctxt->hostname) * 2 + 16; + ret = xmlNanoHTTPConnectHost(proxy, proxyPort); + } + else { + blen = strlen(ctxt->hostname); + ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port); + } + if (ret < 0) { + xmlNanoHTTPFreeCtxt(ctxt); + if (redirURL != NULL) xmlFree(redirURL); + return(NULL); + } + ctxt->fd = ret; + + if (input == NULL) + ilen = 0; + else + blen += 36; + + if (headers != NULL) + blen += strlen(headers) + 2; + if (contentType && *contentType) + blen += strlen(*contentType) + 16; + blen += strlen(method) + strlen(ctxt->path) + 24; + bp = xmlMalloc(blen); + if ( bp == NULL ) { + xmlNanoHTTPFreeCtxt( ctxt ); + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPMethodRedir: %s", + "Error allocating HTTP header buffer." ); + return ( NULL ); + } + + p = bp; + + if (proxy) { + if (ctxt->port != 80) { + p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s", + method, ctxt->hostname, + ctxt->port, ctxt->path ); + } + else + p += snprintf( p, blen - (p - bp), "%s http://%s%s", method, + ctxt->hostname, ctxt->path); + } + else + p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path); + + p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n", + ctxt->hostname); + + if (contentType != NULL && *contentType) + p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType); + + if (headers != NULL) + p += snprintf( p, blen - (p - bp), "%s", headers ); + + if (input != NULL) + snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen ); + else + snprintf(p, blen - (p - bp), "\r\n"); + +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, + "-> %s%s", proxy? "(Proxy) " : "", bp); + if ((blen -= strlen(bp)+1) < 0) + xmlGenericError(xmlGenericErrorContext, + "ERROR: overflowed buffer by %d bytes\n", -blen); +#endif + ctxt->outptr = ctxt->out = bp; + ctxt->state = XML_NANO_HTTP_WRITE; + blen = strlen( ctxt->out ); + xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen ); +#ifdef DEBUG_HTTP + if ( xmt_bytes != blen ) + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPMethodRedir: Only %d of %d %s %s\n", + xmt_bytes, blen, + "bytes of HTTP headers sent to host", + ctxt->hostname ); +#endif + + if ( input != NULL ) { + xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen ); + +#ifdef DEBUG_HTTP + if ( xmt_bytes != ilen ) + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPMethodRedir: Only %d of %d %s %s\n", + xmt_bytes, ilen, + "bytes of HTTP content sent to host", + ctxt->hostname ); +#endif + } + + ctxt->state = XML_NANO_HTTP_READ; + head = 1; + + while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) { + if (head && (*p == 0)) { + head = 0; + ctxt->content = ctxt->inrptr; + xmlFree(p); + break; + } + xmlNanoHTTPScanAnswer(ctxt, p); + +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, "<- %s\n", p); +#endif + xmlFree(p); + } + + if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) && + (ctxt->returnValue < 400)) { +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, + "\nRedirect to: %s\n", ctxt->location); +#endif + while ( xmlNanoHTTPRecv(ctxt) > 0 ) ; + if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) { + nbRedirects++; + if (redirURL != NULL) + xmlFree(redirURL); + redirURL = xmlMemStrdup(ctxt->location); + xmlNanoHTTPFreeCtxt(ctxt); + goto retry; + } + xmlNanoHTTPFreeCtxt(ctxt); + if (redirURL != NULL) xmlFree(redirURL); +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n"); +#endif + return(NULL); + } + + if (contentType != NULL) { + if (ctxt->contentType != NULL) + *contentType = xmlMemStrdup(ctxt->contentType); + else + *contentType = NULL; + } + + if ((redir != NULL) && (redirURL != NULL)) { + *redir = redirURL; + } else { + if (redirURL != NULL) + xmlFree(redirURL); + if (redir != NULL) + *redir = NULL; + } + +#ifdef DEBUG_HTTP + if (ctxt->contentType != NULL) + xmlGenericError(xmlGenericErrorContext, + "\nCode %d, content-type '%s'\n\n", + ctxt->returnValue, ctxt->contentType); + else + xmlGenericError(xmlGenericErrorContext, + "\nCode %d, no content-type\n\n", + ctxt->returnValue); +#endif + + return((void *) ctxt); +} + +/** + * xmlNanoHTTPMethod: + * @URL: The URL to load + * @method: the HTTP method to use + * @input: the input string if any + * @contentType: the Content-Type information IN and OUT + * @headers: the extra headers + * @ilen: input length + * + * This function try to open a connection to the indicated resource + * via HTTP using the given @method, adding the given extra headers + * and the input buffer for the request content. + * + * Returns NULL in case of failure, otherwise a request handler. + * The contentType, if provided must be freed by the caller + */ + +void* +xmlNanoHTTPMethod(const char *URL, const char *method, const char *input, + char **contentType, const char *headers, int ilen) { + return(xmlNanoHTTPMethodRedir(URL, method, input, contentType, + NULL, headers, ilen)); +} + +/** + * xmlNanoHTTPFetch: + * @URL: The URL to load + * @filename: the filename where the content should be saved + * @contentType: if available the Content-Type information will be + * returned at that location + * + * This function try to fetch the indicated resource via HTTP GET + * and save it's content in the file. + * + * Returns -1 in case of failure, 0 incase of success. The contentType, + * if provided must be freed by the caller + */ +int +xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) { + void *ctxt = NULL; + char *buf = NULL; + int fd; + int len; + + ctxt = xmlNanoHTTPOpen(URL, contentType); + if (ctxt == NULL) return(-1); + + if (!strcmp(filename, "-")) + fd = 0; + else { + fd = open(filename, O_CREAT | O_WRONLY, 00644); + if (fd < 0) { + xmlNanoHTTPClose(ctxt); + if ((contentType != NULL) && (*contentType != NULL)) { + xmlFree(*contentType); + *contentType = NULL; + } + return(-1); + } + } + + xmlNanoHTTPFetchContent( ctxt, &buf, &len ); + if ( len > 0 ) { + write(fd, buf, len); + } + + xmlNanoHTTPClose(ctxt); + close(fd); + return(0); +} + +/** + * xmlNanoHTTPSave: + * @ctxt: the HTTP context + * @filename: the filename where the content should be saved + * + * This function saves the output of the HTTP transaction to a file + * It closes and free the context at the end + * + * Returns -1 in case of failure, 0 incase of success. + */ +int +xmlNanoHTTPSave(void *ctxt, const char *filename) { + char *buf = NULL; + int fd; + int len; + + if (ctxt == NULL) return(-1); + + if (!strcmp(filename, "-")) + fd = 0; + else { + fd = open(filename, O_CREAT | O_WRONLY); + if (fd < 0) { + xmlNanoHTTPClose(ctxt); + return(-1); + } + } + + xmlNanoHTTPFetchContent( ctxt, &buf, &len ); + if ( len > 0 ) { + write(fd, buf, len); + } + + xmlNanoHTTPClose(ctxt); + return(0); +} + +/** + * xmlNanoHTTPReturnCode: + * @ctx: the HTTP context + * + * Get the latest HTTP return code received + * + * Returns the HTTP return code for the request. + */ +int +xmlNanoHTTPReturnCode(void *ctx) { + xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; + + if (ctxt == NULL) return(-1); + + return(ctxt->returnValue); +} + +/** + * xmlNanoHTTPAuthHeader: + * @ctx: the HTTP context + * + * Get the authentication header of an HTTP context + * + * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate + * header. + */ +const char * +xmlNanoHTTPAuthHeader(void *ctx) { + xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; + + if (ctxt == NULL) return(NULL); + + return(ctxt->authHeader); +} + +/** + * xmlNanoHTTPContentLength: + * @ctx: the HTTP context + * + * Provides the specified content length from the HTTP header. + * + * Return the specified content length from the HTTP header. Note that + * a value of -1 indicates that the content length element was not included in + * the response header. + */ +int +xmlNanoHTTPContentLength( void * ctx ) { + xmlNanoHTTPCtxtPtr ctxt = ctx; + + return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength ); +} + +/** + * xmlNanoHTTPFetchContent: + * @ctx: the HTTP context + * @ptr: pointer to set to the content buffer. + * @len: integer pointer to hold the length of the content + * + * Check if all the content was read + * + * Returns 0 if all the content was read and available, returns + * -1 if received content length was less than specified or an error + * occurred. + */ +int +xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) { + xmlNanoHTTPCtxtPtr ctxt = ctx; + + int rc = 0; + int cur_lgth; + int rcvd_lgth; + int dummy_int; + char * dummy_ptr = NULL; + + /* Dummy up return input parameters if not provided */ + + if ( len == NULL ) + len = &dummy_int; + + if ( ptr == NULL ) + ptr = &dummy_ptr; + + /* But can't work without the context pointer */ + + if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) { + *len = 0; + *ptr = NULL; + return ( -1 ); + } + + rcvd_lgth = ctxt->inptr - ctxt->content; + + while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) { + + rcvd_lgth += cur_lgth; + if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) ) + break; + } + + *ptr = ctxt->content; + *len = rcvd_lgth; + + if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) ) + rc = -1; + else if ( rcvd_lgth == 0 ) + rc = -1; + + return ( rc ); +} + +#ifdef STANDALONE +int main(int argc, char **argv) { + char *contentType = NULL; + + if (argv[1] != NULL) { + if (argv[2] != NULL) + xmlNanoHTTPFetch(argv[1], argv[2], &contentType); + else + xmlNanoHTTPFetch(argv[1], "-", &contentType); + if (contentType != NULL) xmlFree(contentType); + } else { + xmlGenericError(xmlGenericErrorContext, + "%s: minimal HTTP GET implementation\n", argv[0]); + xmlGenericError(xmlGenericErrorContext, + "\tusage %s [ URL [ filename ] ]\n", argv[0]); + } + xmlNanoHTTPCleanup(); + xmlMemoryDump(); + return(0); +} +#endif /* STANDALONE */ +#else /* !LIBXML_HTTP_ENABLED */ +#ifdef STANDALONE +#include <stdio.h> +int main(int argc, char **argv) { + xmlGenericError(xmlGenericErrorContext, + "%s : HTTP support not compiled in\n", argv[0]); + return(0); +} +#endif /* STANDALONE */ +#endif /* LIBXML_HTTP_ENABLED */ diff --git a/bundle/libxml/parser.c b/bundle/libxml/parser.c new file mode 100644 index 0000000000..66908264fc --- /dev/null +++ b/bundle/libxml/parser.c @@ -0,0 +1,10912 @@ +/* + * parser.c : an XML 1.0 parser, namespaces and validity support are mostly + * implemented on top of the SAX interfaces + * + * References: + * The XML specification: + * http://www.w3.org/TR/REC-xml + * Original 1.0 version: + * http://www.w3.org/TR/1998/REC-xml-19980210 + * XML second edition working draft + * http://www.w3.org/TR/2000/WD-xml-2e-20000814 + * + * Okay this is a big file, the parser core is around 7000 lines, then it + * is followed by the progressive parser top routines, then the various + * high level APIs to call the parser and a few miscellaneous functions. + * A number of helper functions and deprecated ones have been moved to + * parserInternals.c to reduce this file size. + * As much as possible the functions are associated with their relative + * production in the XML specification. A few productions defining the + * different ranges of character are actually implanted either in + * parserInternals.h or parserInternals.c + * The DOM tree build is realized from the default SAX callbacks in + * the module SAX.c. + * The routines doing the validation checks are in valid.c and called either + * from the SAX callbacks or as standalone functions using a preparsed + * document. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#if defined(WIN32) && !defined (__CYGWIN__) +#define XML_DIR_SEP '\\' +#else +#define XML_DIR_SEP '/' +#endif + +#include <stdlib.h> +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/threads.h> +#include <libxml/globals.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/valid.h> +#include <libxml/entities.h> +#include <libxml/xmlerror.h> +#include <libxml/encoding.h> +#include <libxml/xmlIO.h> +#include <libxml/uri.h> +#ifdef LIBXML_CATALOG_ENABLED +#include <libxml/catalog.h> +#endif + +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + + +#define XML_PARSER_BIG_BUFFER_SIZE 300 +#define XML_PARSER_BUFFER_SIZE 100 + +#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" + +/* + * List of XML prefixed PI allowed by W3C specs + */ + +static const char *xmlW3CPIs[] = { + "xml-stylesheet", + NULL +}; + +/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ +xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, + const xmlChar **str); + +static int +xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, + xmlSAXHandlerPtr sax, + void *user_data, int depth, const xmlChar *URL, + const xmlChar *ID, xmlNodePtr *list); + +static void +xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, + xmlNodePtr lastNode); + +static int +xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, + const xmlChar *string, void *user_data, xmlNodePtr *lst); +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ + +xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, + const xmlChar ** str); + +/** + * inputPush: + * @ctxt: an XML parser context + * @value: the parser input + * + * Pushes a new parser input on top of the input stack + * + * Returns 0 in case of error, the index in the stack otherwise + */ +extern int +inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) +{ + if (ctxt->inputNr >= ctxt->inputMax) { + ctxt->inputMax *= 2; + ctxt->inputTab = + (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, + ctxt->inputMax * + sizeof(ctxt->inputTab[0])); + if (ctxt->inputTab == NULL) { + xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); + return (0); + } + } + ctxt->inputTab[ctxt->inputNr] = value; + ctxt->input = value; + return (ctxt->inputNr++); +} +/** + * inputPop: + * @ctxt: an XML parser context + * + * Pops the top parser input from the input stack + * + * Returns the input just removed + */ +extern xmlParserInputPtr +inputPop(xmlParserCtxtPtr ctxt) +{ + xmlParserInputPtr ret; + + if (ctxt->inputNr <= 0) + return (0); + ctxt->inputNr--; + if (ctxt->inputNr > 0) + ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; + else + ctxt->input = NULL; + ret = ctxt->inputTab[ctxt->inputNr]; + ctxt->inputTab[ctxt->inputNr] = 0; + return (ret); +} +/** + * nodePush: + * @ctxt: an XML parser context + * @value: the element node + * + * Pushes a new element node on top of the node stack + * + * Returns 0 in case of error, the index in the stack otherwise + */ +extern int +nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) +{ + if (ctxt->nodeNr >= ctxt->nodeMax) { + ctxt->nodeMax *= 2; + ctxt->nodeTab = + (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, + ctxt->nodeMax * + sizeof(ctxt->nodeTab[0])); + if (ctxt->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); + return (0); + } + } + ctxt->nodeTab[ctxt->nodeNr] = value; + ctxt->node = value; + return (ctxt->nodeNr++); +} +/** + * nodePop: + * @ctxt: an XML parser context + * + * Pops the top element node from the node stack + * + * Returns the node just removed + */ +extern xmlNodePtr +nodePop(xmlParserCtxtPtr ctxt) +{ + xmlNodePtr ret; + + if (ctxt->nodeNr <= 0) + return (0); + ctxt->nodeNr--; + if (ctxt->nodeNr > 0) + ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; + else + ctxt->node = NULL; + ret = ctxt->nodeTab[ctxt->nodeNr]; + ctxt->nodeTab[ctxt->nodeNr] = 0; + return (ret); +} +/** + * namePush: + * @ctxt: an XML parser context + * @value: the element name + * + * Pushes a new element name on top of the name stack + * + * Returns 0 in case of error, the index in the stack otherwise + */ +extern int +namePush(xmlParserCtxtPtr ctxt, xmlChar * value) +{ + if (ctxt->nameNr >= ctxt->nameMax) { + ctxt->nameMax *= 2; + ctxt->nameTab = + (xmlChar * *)xmlRealloc(ctxt->nameTab, + ctxt->nameMax * + sizeof(ctxt->nameTab[0])); + if (ctxt->nameTab == NULL) { + xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); + return (0); + } + } + ctxt->nameTab[ctxt->nameNr] = value; + ctxt->name = value; + return (ctxt->nameNr++); +} +/** + * namePop: + * @ctxt: an XML parser context + * + * Pops the top element name from the name stack + * + * Returns the name just removed + */ +extern xmlChar * +namePop(xmlParserCtxtPtr ctxt) +{ + xmlChar *ret; + + if (ctxt->nameNr <= 0) + return (0); + ctxt->nameNr--; + if (ctxt->nameNr > 0) + ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; + else + ctxt->name = NULL; + ret = ctxt->nameTab[ctxt->nameNr]; + ctxt->nameTab[ctxt->nameNr] = 0; + return (ret); +} + +static int spacePush(xmlParserCtxtPtr ctxt, int val) { + if (ctxt->spaceNr >= ctxt->spaceMax) { + ctxt->spaceMax *= 2; + ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, + ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); + if (ctxt->spaceTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return(0); + } + } + ctxt->spaceTab[ctxt->spaceNr] = val; + ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; + return(ctxt->spaceNr++); +} + +static int spacePop(xmlParserCtxtPtr ctxt) { + int ret; + if (ctxt->spaceNr <= 0) return(0); + ctxt->spaceNr--; + if (ctxt->spaceNr > 0) + ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; + else + ctxt->space = NULL; + ret = ctxt->spaceTab[ctxt->spaceNr]; + ctxt->spaceTab[ctxt->spaceNr] = -1; + return(ret); +} + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one often need to make assumption on the context to + * use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * To be used with extreme caution since operations consuming + * characters may move the input buffer to a different location ! + * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled + * This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * RAW same as CUR but in the input buffer, bypass any token + * extraction that may have been done + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * + * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding + * + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * NEXTL(l) Skip l xmlChar in the input buffer + * CUR_CHAR(l) returns the current unicode character (int), set l + * to the number of xmlChars used for the encoding [0-5]. + * CUR_SCHAR same but operate on a string instead of the context + * COPY_BUF copy the current unicode char to the target buffer, increment + * the index + * GROW, SHRINK handling of input buffers + */ + +#define RAW (*ctxt->input->cur) +#define CUR (*ctxt->input->cur) +#define NXT(val) ctxt->input->cur[(val)] +#define CUR_PTR ctxt->input->cur + +#define SKIP(val) do { \ + ctxt->nbChars += (val),ctxt->input->cur += (val); \ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ + if ((*ctxt->input->cur == 0) && \ + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ + xmlPopInput(ctxt); \ + } while (0) + +#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \ + xmlSHRINK (ctxt); + +static void xmlSHRINK (xmlParserCtxtPtr ctxt) { + xmlParserInputShrink(ctxt->input); + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) + xmlPopInput(ctxt); + } + +#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \ + xmlGROW (ctxt); + +static void xmlGROW (xmlParserCtxtPtr ctxt) { + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) + xmlPopInput(ctxt); + } + +#define SKIP_BLANKS xmlSkipBlankChars(ctxt) + +#define NEXT xmlNextChar(ctxt) + +#define NEXT1 { \ + ctxt->input->cur++; \ + ctxt->nbChars++; \ + if (*ctxt->input->cur == 0) \ + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ + } + +#define NEXTL(l) do { \ + if (*(ctxt->input->cur) == '\n') { \ + ctxt->input->line++; ctxt->input->col = 1; \ + } else ctxt->input->col++; \ + ctxt->input->cur += l; \ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ + } while (0) + +#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) +#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) + +#define COPY_BUF(l,b,i,v) \ + if (l == 1) b[i++] = (xmlChar) v; \ + else i += xmlCopyCharMultiByte(&b[i],v) + +/** + * xmlSkipBlankChars: + * @ctxt: the XML parser context + * + * skip all blanks character found at that point in the input streams. + * It pops up finished entities in the process if allowable at that point. + * + * Returns the number of space chars skipped + */ + +int +xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { + int res = 0; + + /* + * It's Okay to use CUR/NEXT here since all the blanks are on + * the ASCII range. + */ + if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { + const xmlChar *cur; + /* + * if we are in the document content, go really fast + */ + cur = ctxt->input->cur; + while (IS_BLANK(*cur)) { + if (*cur == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } + cur++; + res++; + if (*cur == 0) { + ctxt->input->cur = cur; + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } + } + ctxt->input->cur = cur; + } else { + int cur; + do { + cur = CUR; + while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ + NEXT; + cur = CUR; + res++; + } + while ((cur == 0) && (ctxt->inputNr > 1) && + (ctxt->instate != XML_PARSER_COMMENT)) { + xmlPopInput(ctxt); + cur = CUR; + } + /* + * Need to handle support of entities branching here + */ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); + } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ + } + return(res); +} + +/************************************************************************ + * * + * Commodity functions to handle entities * + * * + ************************************************************************/ + +/** + * xmlPopInput: + * @ctxt: an XML parser context + * + * xmlPopInput: the current input pointed by ctxt->input came to an end + * pop it and return the next char. + * + * Returns the current xmlChar in the parser context + */ +xmlChar +xmlPopInput(xmlParserCtxtPtr ctxt) { + if (ctxt->inputNr == 1) return(0); /* End of main Input */ + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "Popping input %d\n", ctxt->inputNr); + xmlFreeInputStream(inputPop(ctxt)); + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) + return(xmlPopInput(ctxt)); + return(CUR); +} + +/** + * xmlPushInput: + * @ctxt: an XML parser context + * @input: an XML parser input fragment (entity, XML fragment ...). + * + * xmlPushInput: switch to a new input stream which is stacked on top + * of the previous one(s). + */ +void +xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { + if (input == NULL) return; + + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + xmlGenericError(xmlGenericErrorContext, + "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + xmlGenericError(xmlGenericErrorContext, + "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); + } + inputPush(ctxt, input); + GROW; +} + +/** + * xmlParseCharRef: + * @ctxt: an XML parser context + * + * parse Reference declarations + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + * + * Returns the value parsed (as an int), 0 in case of error + */ +int +xmlParseCharRef(xmlParserCtxtPtr ctxt) { + unsigned int val = 0; + int count = 0; + + /* + * Using RAW/CUR/NEXT is okay since we are working on ASCII range here + */ + if ((RAW == '&') && (NXT(1) == '#') && + (NXT(2) == 'x')) { + SKIP(3); + GROW; + while (RAW != ';') { /* loop blocked by count */ + if (count++ > 20) { + count = 0; + GROW; + } + if ((RAW >= '0') && (RAW <= '9')) + val = val * 16 + (CUR - '0'); + else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) + val = val * 16 + (CUR - 'a') + 10; + else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) + val = val * 16 + (CUR - 'A') + 10; + else { + ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseCharRef: invalid hexadecimal value\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + val = 0; + break; + } + NEXT; + count++; + } + if (RAW == ';') { + /* on purpose to avoid reentrancy problems with NEXT and SKIP */ + ctxt->nbChars ++; + ctxt->input->cur++; + } + } else if ((RAW == '&') && (NXT(1) == '#')) { + SKIP(2); + GROW; + while (RAW != ';') { /* loop blocked by count */ + if (count++ > 20) { + count = 0; + GROW; + } + if ((RAW >= '0') && (RAW <= '9')) + val = val * 10 + (CUR - '0'); + else { + ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseCharRef: invalid decimal value\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + val = 0; + break; + } + NEXT; + count++; + } + if (RAW == ';') { + /* on purpose to avoid reentrancy problems with NEXT and SKIP */ + ctxt->nbChars ++; + ctxt->input->cur++; + } + } else { + ctxt->errNo = XML_ERR_INVALID_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseCharRef: invalid value\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + /* + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + */ + if (IS_CHAR(val)) { + return(val); + } else { + ctxt->errNo = XML_ERR_INVALID_CHAR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseCharRef: invalid xmlChar value %d\n", + val); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + return(0); +} + +/** + * xmlParseStringCharRef: + * @ctxt: an XML parser context + * @str: a pointer to an index in the string + * + * parse Reference declarations, variant parsing from a string rather + * than an an input flow. + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + * + * Returns the value parsed (as an int), 0 in case of error, str will be + * updated to the current value of the index + */ +static int +xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { + const xmlChar *ptr; + xmlChar cur; + int val = 0; + + if ((str == NULL) || (*str == NULL)) return(0); + ptr = *str; + cur = *ptr; + if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { + ptr += 3; + cur = *ptr; + while (cur != ';') { /* Non input consuming loop */ + if ((cur >= '0') && (cur <= '9')) + val = val * 16 + (cur - '0'); + else if ((cur >= 'a') && (cur <= 'f')) + val = val * 16 + (cur - 'a') + 10; + else if ((cur >= 'A') && (cur <= 'F')) + val = val * 16 + (cur - 'A') + 10; + else { + ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringCharRef: invalid hexadecimal value\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + val = 0; + break; + } + ptr++; + cur = *ptr; + } + if (cur == ';') + ptr++; + } else if ((cur == '&') && (ptr[1] == '#')){ + ptr += 2; + cur = *ptr; + while (cur != ';') { /* Non input consuming loops */ + if ((cur >= '0') && (cur <= '9')) + val = val * 10 + (cur - '0'); + else { + ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringCharRef: invalid decimal value\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + val = 0; + break; + } + ptr++; + cur = *ptr; + } + if (cur == ';') + ptr++; + } else { + ctxt->errNo = XML_ERR_INVALID_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringCharRef: invalid value\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(0); + } + *str = ptr; + + /* + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + */ + if (IS_CHAR(val)) { + return(val); + } else { + ctxt->errNo = XML_ERR_INVALID_CHAR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringCharRef: invalid xmlChar value %d\n", val); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + return(0); +} + +/** + * xmlNewBlanksWrapperInputStream: + * @ctxt: an XML parser context + * @entity: an Entity pointer + * + * Create a new input stream for wrapping + * blanks around a PEReference + * + * Returns the new input stream or NULL + */ + +static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} + +static xmlParserInputPtr +xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { + xmlParserInputPtr input; + xmlChar *buffer; + size_t length; + if (entity == NULL) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "internal: xmlNewBlanksWrapperInputStream entity = NULL\n"); + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + return(NULL); + } + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "new blanks wrapper for entity: %s\n", entity->name); + input = xmlNewInputStream(ctxt); + if (input == NULL) { + return(NULL); + } + length = xmlStrlen(entity->name) + 5; + buffer = xmlMalloc(length); + if (buffer == NULL) { + return(NULL); + } + buffer [0] = ' '; + buffer [1] = '%'; + buffer [length-3] = ';'; + buffer [length-2] = ' '; + buffer [length-1] = 0; + memcpy(buffer + 2, entity->name, length - 5); + input->free = deallocblankswrapper; + input->base = buffer; + input->cur = buffer; + input->length = length; + input->end = &buffer[length]; + return(input); +} + +/** + * xmlParserHandlePEReference: + * @ctxt: the parser context + * + * [69] PEReference ::= '%' Name ';' + * + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive + * reference to itself, either directly or indirectly. + * + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an internal DTD + * subset which contains no parameter entity references, or a document + * with "standalone='yes'", ... ... The declaration of a parameter + * entity must precede any reference to it... + * + * [ VC: Entity Declared ] + * In a document with an external subset or external parameter entities + * with "standalone='no'", ... ... The declaration of a parameter entity + * must precede any reference to it... + * + * [ WFC: In DTD ] + * Parameter-entity references may only appear in the DTD. + * NOTE: misleading but this is handled. + * + * A PEReference may have been detected in the current input stream + * the handling is done accordingly to + * http://www.w3.org/TR/REC-xml#entproc + * i.e. + * - Included in literal in entity values + * - Included as Parameter Entity reference within DTDs + */ +void +xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlEntityPtr entity = NULL; + xmlParserInputPtr input; + + if (RAW != '%') return; + switch(ctxt->instate) { + case XML_PARSER_CDATA_SECTION: + return; + case XML_PARSER_COMMENT: + return; + case XML_PARSER_START_TAG: + return; + case XML_PARSER_END_TAG: + return; + case XML_PARSER_EOF: + ctxt->errNo = XML_ERR_PEREF_AT_EOF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + case XML_PARSER_PROLOG: + case XML_PARSER_START: + case XML_PARSER_MISC: + ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + case XML_PARSER_ENTITY_DECL: + case XML_PARSER_CONTENT: + case XML_PARSER_ATTRIBUTE_VALUE: + case XML_PARSER_PI: + case XML_PARSER_SYSTEM_LITERAL: + case XML_PARSER_PUBLIC_LITERAL: + /* we just ignore it there */ + return; + case XML_PARSER_EPILOG: + ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + case XML_PARSER_ENTITY_VALUE: + /* + * NOTE: in the case of entity values, we don't do the + * substitution here since we need the literal + * entity value to be able to save the internal + * subset of the document. + * This will be handled by xmlStringDecodeEntities + */ + return; + case XML_PARSER_DTD: + /* + * [WFC: Well-Formedness Constraint: PEs in Internal Subset] + * In the internal DTD subset, parameter-entity references + * can occur only where markup declarations can occur, not + * within markup declarations. + * In that case this is handled in xmlParseMarkupDecl + */ + if ((ctxt->external == 0) && (ctxt->inputNr == 1)) + return; + if (IS_BLANK(NXT(1)) || NXT(1) == 0) + return; + break; + case XML_PARSER_IGNORE: + return; + } + + NEXT; + name = xmlParseName(ctxt); + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "PEReference: %s\n", name); + if (name == NULL) { + ctxt->errNo = XML_ERR_PEREF_NO_NAME; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (RAW == ';') { + NEXT; + if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (entity == NULL) { + + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PEReference: %%%s; not found\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((!ctxt->disableSAX) && + (ctxt->validate) && (ctxt->vctxt.error != NULL)) { + ctxt->vctxt.error(ctxt->vctxt.userData, + "PEReference: %%%s; not found\n", name); + } else if ((!ctxt->disableSAX) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "PEReference: %%%s; not found\n", name); + ctxt->valid = 0; + } + } else if (ctxt->input->free != deallocblankswrapper) { + input = xmlNewBlanksWrapperInputStream(ctxt, entity); + xmlPushInput(ctxt, input); + } else { + if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || + (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { + xmlChar start[4]; + xmlCharEncoding enc; + + /* + * handle the extra spaces added before and after + * c.f. http://www.w3.org/TR/REC-xml#as-PE + * this is done independently. + */ + input = xmlNewEntityInputStream(ctxt, entity); + xmlPushInput(ctxt, input); + + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + GROW + if (entity->length >= 4) { + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + } + + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + (RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + xmlParseTextDecl(ctxt); + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParserHandlePEReference: %s is not a parameter entity\n", + name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + } else { + ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParserHandlePEReference: expecting ';'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + xmlFree(name); + } +} + +/* + * Macro used to grow the current buffer. + */ +#define growBuffer(buffer) { \ + buffer##_size *= 2; \ + buffer = (xmlChar *) \ + xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + xmlGenericError(xmlGenericErrorContext, "realloc failed"); \ + return(NULL); \ + } \ +} + +/** + * xmlStringDecodeEntities: + * @ctxt: the parser context + * @str: the input string + * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF + * @end: an end marker xmlChar, 0 if none + * @end2: an end marker xmlChar, 0 if none + * @end3: an end marker xmlChar, 0 if none + * + * Takes a entity string content and process to do the adequate substitutions. + * + * [67] Reference ::= EntityRef | CharRef + * + * [69] PEReference ::= '%' Name ';' + * + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +xmlChar * +xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, + xmlChar end, xmlChar end2, xmlChar end3) { + xmlChar *buffer = NULL; + int buffer_size = 0; + + xmlChar *current = NULL; + xmlEntityPtr ent; + int c,l; + int nbchars = 0; + + if (str == NULL) + return(NULL); + + if (ctxt->depth > 40) { + ctxt->errNo = XML_ERR_ENTITY_LOOP; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Detected entity reference loop\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + + /* + * allocate a translation buffer. + */ + buffer_size = XML_PARSER_BIG_BUFFER_SIZE; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlStringDecodeEntities: malloc failed"); + return(NULL); + } + + /* + * OK loop until we reach one of the ending char or a size limit. + * we are operating on already parsed values. + */ + c = CUR_SCHAR(str, l); + while ((c != 0) && (c != end) && /* non input consuming loop */ + (c != end2) && (c != end3)) { + + if (c == 0) break; + if ((c == '&') && (str[1] == '#')) { + int val = xmlParseStringCharRef(ctxt, &str); + if (val != 0) { + COPY_BUF(0,buffer,nbchars,val); + } + } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "String decoding Entity Reference: %.30s\n", + str); + ent = xmlParseStringEntityRef(ctxt, &str); + if ((ent != NULL) && + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + if (ent->content != NULL) { + COPY_BUF(0,buffer,nbchars,ent->content[0]); + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "internal error entity has no content\n"); + } + } else if ((ent != NULL) && (ent->content != NULL)) { + xmlChar *rep; + + ctxt->depth++; + rep = xmlStringDecodeEntities(ctxt, ent->content, what, + 0, 0, 0); + ctxt->depth--; + if (rep != NULL) { + current = rep; + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars > + buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + xmlFree(rep); + } + } else if (ent != NULL) { + int i = xmlStrlen(ent->name); + const xmlChar *cur = ent->name; + + buffer[nbchars++] = '&'; + if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + for (;i > 0;i--) + buffer[nbchars++] = *cur++; + buffer[nbchars++] = ';'; + } + } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "String decoding PE Reference: %.30s\n", str); + ent = xmlParseStringPEReference(ctxt, &str); + if (ent != NULL) { + xmlChar *rep; + + ctxt->depth++; + rep = xmlStringDecodeEntities(ctxt, ent->content, what, + 0, 0, 0); + ctxt->depth--; + if (rep != NULL) { + current = rep; + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars > + buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + xmlFree(rep); + } + } + } else { + COPY_BUF(l,buffer,nbchars,c); + str += l; + if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + c = CUR_SCHAR(str, l); + } + buffer[nbchars++] = 0; + return(buffer); +} + + +/************************************************************************ + * * + * Commodity functions to handle xmlChars * + * * + ************************************************************************/ + +/** + * xmlStrndup: + * @cur: the input xmlChar * + * @len: the len of @cur + * + * a strndup for array of xmlChar's + * + * Returns a new xmlChar * or NULL + */ +xmlChar * +xmlStrndup(const xmlChar *cur, int len) { + xmlChar *ret; + + if ((cur == NULL) || (len < 0)) return(NULL); + ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %ld byte failed\n", + (len + 1) * (long)sizeof(xmlChar)); + return(NULL); + } + memcpy(ret, cur, len * sizeof(xmlChar)); + ret[len] = 0; + return(ret); +} + +/** + * xmlStrdup: + * @cur: the input xmlChar * + * + * a strdup for array of xmlChar's. Since they are supposed to be + * encoded in UTF-8 or an encoding with 8bit based chars, we assume + * a termination mark of '0'. + * + * Returns a new xmlChar * or NULL + */ +xmlChar * +xmlStrdup(const xmlChar *cur) { + const xmlChar *p = cur; + + if (cur == NULL) return(NULL); + while (*p != 0) p++; /* non input consuming */ + return(xmlStrndup(cur, p - cur)); +} + +/** + * xmlCharStrndup: + * @cur: the input char * + * @len: the len of @cur + * + * a strndup for char's to xmlChar's + * + * Returns a new xmlChar * or NULL + */ + +xmlChar * +xmlCharStrndup(const char *cur, int len) { + int i; + xmlChar *ret; + + if ((cur == NULL) || (len < 0)) return(NULL); + ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", + (len + 1) * (long)sizeof(xmlChar)); + return(NULL); + } + for (i = 0;i < len;i++) + ret[i] = (xmlChar) cur[i]; + ret[len] = 0; + return(ret); +} + +/** + * xmlCharStrdup: + * @cur: the input char * + * + * a strdup for char's to xmlChar's + * + * Returns a new xmlChar * or NULL + */ + +xmlChar * +xmlCharStrdup(const char *cur) { + const char *p = cur; + + if (cur == NULL) return(NULL); + while (*p != '\0') p++; /* non input consuming */ + return(xmlCharStrndup(cur, p - cur)); +} + +/** + * xmlStrcmp: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * + * a strcmp for xmlChar's + * + * Returns the integer result of the comparison + */ + +int +xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { + register int tmp; + + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + do { + tmp = *str1++ - *str2; + if (tmp != 0) return(tmp); + } while (*str2++ != 0); + return 0; +} + +/** + * xmlStrEqual: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * + * Check if both string are equal of have same content + * Should be a bit more readable and faster than xmlStrEqual() + * + * Returns 1 if they are equal, 0 if they are different + */ + +int +xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { + if (str1 == str2) return(1); + if (str1 == NULL) return(0); + if (str2 == NULL) return(0); + do { + if (*str1++ != *str2) return(0); + } while (*str2++); + return(1); +} + +/** + * xmlStrncmp: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * @len: the max comparison length + * + * a strncmp for xmlChar's + * + * Returns the integer result of the comparison + */ + +int +xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { + register int tmp; + + if (len <= 0) return(0); + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + do { + tmp = *str1++ - *str2; + if (tmp != 0 || --len == 0) return(tmp); + } while (*str2++ != 0); + return 0; +} + +static const xmlChar casemap[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, + 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, + 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, + 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, + 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, + 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, + 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, + 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, + 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, + 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, + 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, + 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, + 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, + 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, + 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, + 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, + 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, + 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, + 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, + 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, + 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, + 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +}; + +/** + * xmlStrcasecmp: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * + * a strcasecmp for xmlChar's + * + * Returns the integer result of the comparison + */ + +int +xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { + register int tmp; + + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + do { + tmp = casemap[*str1++] - casemap[*str2]; + if (tmp != 0) return(tmp); + } while (*str2++ != 0); + return 0; +} + +/** + * xmlStrncasecmp: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * @len: the max comparison length + * + * a strncasecmp for xmlChar's + * + * Returns the integer result of the comparison + */ + +int +xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { + register int tmp; + + if (len <= 0) return(0); + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + do { + tmp = casemap[*str1++] - casemap[*str2]; + if (tmp != 0 || --len == 0) return(tmp); + } while (*str2++ != 0); + return 0; +} + +/** + * xmlStrchr: + * @str: the xmlChar * array + * @val: the xmlChar to search + * + * a strchr for xmlChar's + * + * Returns the xmlChar * for the first occurrence or NULL. + */ + +const xmlChar * +xmlStrchr(const xmlChar *str, xmlChar val) { + if (str == NULL) return(NULL); + while (*str != 0) { /* non input consuming */ + if (*str == val) return((xmlChar *) str); + str++; + } + return(NULL); +} + +/** + * xmlStrstr: + * @str: the xmlChar * array (haystack) + * @val: the xmlChar to search (needle) + * + * a strstr for xmlChar's + * + * Returns the xmlChar * for the first occurrence or NULL. + */ + +const xmlChar * +xmlStrstr(const xmlChar *str, const xmlChar *val) { + int n; + + if (str == NULL) return(NULL); + if (val == NULL) return(NULL); + n = xmlStrlen(val); + + if (n == 0) return(str); + while (*str != 0) { /* non input consuming */ + if (*str == *val) { + if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); + } + str++; + } + return(NULL); +} + +/** + * xmlStrcasestr: + * @str: the xmlChar * array (haystack) + * @val: the xmlChar to search (needle) + * + * a case-ignoring strstr for xmlChar's + * + * Returns the xmlChar * for the first occurrence or NULL. + */ + +const xmlChar * +xmlStrcasestr(const xmlChar *str, xmlChar *val) { + int n; + + if (str == NULL) return(NULL); + if (val == NULL) return(NULL); + n = xmlStrlen(val); + + if (n == 0) return(str); + while (*str != 0) { /* non input consuming */ + if (casemap[*str] == casemap[*val]) + if (!xmlStrncasecmp(str, val, n)) return(str); + str++; + } + return(NULL); +} + +/** + * xmlStrsub: + * @str: the xmlChar * array (haystack) + * @start: the index of the first char (zero based) + * @len: the length of the substring + * + * Extract a substring of a given string + * + * Returns the xmlChar * for the first occurrence or NULL. + */ + +xmlChar * +xmlStrsub(const xmlChar *str, int start, int len) { + int i; + + if (str == NULL) return(NULL); + if (start < 0) return(NULL); + if (len < 0) return(NULL); + + for (i = 0;i < start;i++) { + if (*str == 0) return(NULL); + str++; + } + if (*str == 0) return(NULL); + return(xmlStrndup(str, len)); +} + +/** + * xmlStrlen: + * @str: the xmlChar * array + * + * length of a xmlChar's string + * + * Returns the number of xmlChar contained in the ARRAY. + */ + +int +xmlStrlen(const xmlChar *str) { + int len = 0; + + if (str == NULL) return(0); + while (*str != 0) { /* non input consuming */ + str++; + len++; + } + return(len); +} + +/** + * xmlStrncat: + * @cur: the original xmlChar * array + * @add: the xmlChar * array added + * @len: the length of @add + * + * a strncat for array of xmlChar's, it will extend @cur with the len + * first bytes of @add. + * + * Returns a new xmlChar *, the original @cur is reallocated if needed + * and should not be freed + */ + +xmlChar * +xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { + int size; + xmlChar *ret; + + if ((add == NULL) || (len == 0)) + return(cur); + if (cur == NULL) + return(xmlStrndup(add, len)); + + size = xmlStrlen(cur); + ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlStrncat: realloc of %ld byte failed\n", + (size + len + 1) * (long)sizeof(xmlChar)); + return(cur); + } + memcpy(&ret[size], add, len * sizeof(xmlChar)); + ret[size + len] = 0; + return(ret); +} + +/** + * xmlStrcat: + * @cur: the original xmlChar * array + * @add: the xmlChar * array added + * + * a strcat for array of xmlChar's. Since they are supposed to be + * encoded in UTF-8 or an encoding with 8bit based chars, we assume + * a termination mark of '0'. + * + * Returns a new xmlChar * containing the concatenated string. + */ +xmlChar * +xmlStrcat(xmlChar *cur, const xmlChar *add) { + const xmlChar *p = add; + + if (add == NULL) return(cur); + if (cur == NULL) + return(xmlStrdup(add)); + + while (*p != 0) p++; /* non input consuming */ + return(xmlStrncat(cur, add, p - add)); +} + +/************************************************************************ + * * + * Commodity functions, cleanup needed ? * + * * + ************************************************************************/ + +/** + * areBlanks: + * @ctxt: an XML parser context + * @str: a xmlChar * + * @len: the size of @str + * + * Is this a sequence of blank chars that one can ignore ? + * + * Returns 1 if ignorable 0 otherwise. + */ + +static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { + int i, ret; + xmlNodePtr lastChild; + + /* + * Don't spend time trying to differentiate them, the same callback is + * used ! + */ + if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) + return(0); + + /* + * Check for xml:space value. + */ + if (*(ctxt->space) == 1) + return(0); + + /* + * Check that the string is made of blanks + */ + for (i = 0;i < len;i++) + if (!(IS_BLANK(str[i]))) return(0); + + /* + * Look if the element is mixed content in the DTD if available + */ + if (ctxt->node == NULL) return(0); + if (ctxt->myDoc != NULL) { + ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); + if (ret == 0) return(1); + if (ret == 1) return(0); + } + + /* + * Otherwise, heuristic :-\ + */ + if (RAW != '<') return(0); + if ((ctxt->node->children == NULL) && + (RAW == '<') && (NXT(1) == '/')) return(0); + + lastChild = xmlGetLastChild(ctxt->node); + if (lastChild == NULL) { + if ((ctxt->node->type != XML_ELEMENT_NODE) && + (ctxt->node->content != NULL)) return(0); + } else if (xmlNodeIsText(lastChild)) + return(0); + else if ((ctxt->node->children != NULL) && + (xmlNodeIsText(ctxt->node->children))) + return(0); + return(1); +} + +/************************************************************************ + * * + * Extra stuff for namespace support * + * Relates to http://www.w3.org/TR/WD-xml-names * + * * + ************************************************************************/ + +/** + * xmlSplitQName: + * @ctxt: an XML parser context + * @name: an XML parser context + * @prefix: a xmlChar ** + * + * parse an UTF8 encoded XML qualified name string + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns the local part, and prefix is updated + * to get the Prefix if any. + */ + +xmlChar * +xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + xmlChar *buffer = NULL; + int len = 0; + int max = XML_MAX_NAMELEN; + xmlChar *ret = NULL; + const xmlChar *cur = name; + int c; + + *prefix = NULL; + +#ifndef XML_XML_NAMESPACE + /* xml: prefix is not really a namespace */ + if ((cur[0] == 'x') && (cur[1] == 'm') && + (cur[2] == 'l') && (cur[3] == ':')) + return(xmlStrdup(name)); +#endif + + /* nasty but valid */ + if (cur[0] == ':') + return(xmlStrdup(name)); + + c = *cur++; + while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ + buf[len++] = c; + c = *cur++; + } + if (len >= max) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((c != 0) && (c != ':')) { /* tested bigname.xml */ + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + } + buffer[len++] = c; + c = *cur++; + } + buffer[len] = 0; + } + + if (buffer == NULL) + ret = xmlStrndup(buf, len); + else { + ret = buffer; + buffer = NULL; + max = XML_MAX_NAMELEN; + } + + + if (c == ':') { + c = *cur; + if (c == 0) return(ret); + *prefix = ret; + len = 0; + + /* + * Check that the first character is proper to start + * a new name + */ + if (!(((c >= 0x61) && (c <= 0x7A)) || + ((c >= 0x41) && (c <= 0x5A)) || + (c == '_') || (c == ':'))) { + int l; + int first = CUR_SCHAR(cur, l); + + if (!IS_LETTER(first) && (first != '_')) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Name %s is not XML Namespace compliant\n", + name); + } + } + cur++; + + while ((c != 0) && (len < max)) { /* tested bigname2.xml */ + buf[len++] = c; + c = *cur++; + } + if (len >= max) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while (c != 0) { /* tested bigname2.xml */ + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + } + buffer[len++] = c; + c = *cur++; + } + buffer[len] = 0; + } + + if (buffer == NULL) + ret = xmlStrndup(buf, len); + else { + ret = buffer; + } + } + + return(ret); +} + +/************************************************************************ + * * + * The parser itself * + * Relates to http://www.w3.org/TR/REC-xml * + * * + ************************************************************************/ + +static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); +/** + * xmlParseName: + * @ctxt: an XML parser context + * + * parse an XML name. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + * + * Returns the Name parsed or NULL + */ + +xmlChar * +xmlParseName(xmlParserCtxtPtr ctxt) { + const xmlChar *in; + xmlChar *ret; + int count = 0; + + GROW; + + /* + * Accelerator for simple ASCII names + */ + in = ctxt->input->cur; + if (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + (*in == '_') || (*in == ':')) { + in++; + while (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + ((*in >= 0x30) && (*in <= 0x39)) || + (*in == '_') || (*in == '-') || + (*in == ':') || (*in == '.')) + in++; + if ((*in > 0) && (*in < 0x80)) { + count = in - ctxt->input->cur; + ret = xmlStrndup(ctxt->input->cur, count); + ctxt->input->cur = in; + return(ret); + } + } + return(xmlParseNameComplex(ctxt)); +} + +/** + * xmlParseNameAndCompare: + * @ctxt: an XML parser context + * + * parse an XML name and compares for match + * (specialized for endtag parsing) + * + * + * Returns NULL for an illegal name, (xmlChar*) 1 for success + * and the name for mismatch + */ + +static xmlChar * +xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { + const xmlChar *cmp = other; + const xmlChar *in; + xmlChar *ret; + + GROW; + + in = ctxt->input->cur; + while (*in != 0 && *in == *cmp) { + ++in; + ++cmp; + } + if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) { + /* success */ + ctxt->input->cur = in; + return (xmlChar*) 1; + } + /* failure (or end of input buffer), check with full function */ + ret = xmlParseName (ctxt); + if (ret != 0 && xmlStrEqual (ret, other)) { + xmlFree (ret); + return (xmlChar*) 1; + } + return ret; +} + +static xmlChar * +xmlParseNameComplex(xmlParserCtxtPtr ctxt) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0, l; + int c; + int count = 0; + + /* + * Handler for more complex cases + */ + GROW; + c = CUR_CHAR(l); + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!IS_LETTER(c) && (c != '_') && + (c != ':'))) { + return(NULL); + } + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c)))) { + if (count++ > 100) { + count = 0; + GROW; + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + c = CUR_CHAR(l); + if (len >= XML_MAX_NAMELEN) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseNameComplex: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseNameComplex: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); + NEXTL(l); + c = CUR_CHAR(l); + } + buffer[len] = 0; + return(buffer); + } + } + return(xmlStrndup(buf, len)); +} + +/** + * xmlParseStringName: + * @ctxt: an XML parser context + * @str: a pointer to the string pointer (IN/OUT) + * + * parse an XML name. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + * + * Returns the Name parsed or NULL. The @str pointer + * is updated to the current location in the string. + */ + +static xmlChar * +xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + const xmlChar *cur = *str; + int len = 0, l; + int c; + + c = CUR_SCHAR(cur, l); + if (!IS_LETTER(c) && (c != '_') && + (c != ':')) { + return(NULL); + } + + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + COPY_BUF(l,buf,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringName: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + } + buffer[len] = 0; + *str = cur; + return(buffer); + } + } + *str = cur; + return(xmlStrndup(buf, len)); +} + +/** + * xmlParseNmtoken: + * @ctxt: an XML parser context + * + * parse an XML Nmtoken. + * + * [7] Nmtoken ::= (NameChar)+ + * + * [8] Nmtokens ::= Nmtoken (S Nmtoken)* + * + * Returns the Nmtoken parsed or NULL + */ + +xmlChar * +xmlParseNmtoken(xmlParserCtxtPtr ctxt) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0, l; + int c; + int count = 0; + + GROW; + c = CUR_CHAR(l); + + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + c = CUR_CHAR(l); + if (len >= XML_MAX_NAMELEN) { + /* + * Okay someone managed to make a huge token, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseNmtoken: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseNmtoken: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); + NEXTL(l); + c = CUR_CHAR(l); + } + buffer[len] = 0; + return(buffer); + } + } + if (len == 0) + return(NULL); + return(xmlStrndup(buf, len)); +} + +/** + * xmlParseEntityValue: + * @ctxt: an XML parser context + * @orig: if non-NULL store a copy of the original entity value + * + * parse a value for ENTITY declarations + * + * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | + * "'" ([^%&'] | PEReference | Reference)* "'" + * + * Returns the EntityValue parsed with reference substituted or NULL + */ + +xmlChar * +xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + int c, l; + xmlChar stop; + xmlChar *ret = NULL; + const xmlChar *cur = NULL; + xmlParserInputPtr input; + + if (RAW == '"') stop = '"'; + else if (RAW == '\'') stop = '\''; + else { + ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + + /* + * The content of the entity definition is copied in a buffer. + */ + + ctxt->instate = XML_PARSER_ENTITY_VALUE; + input = ctxt->input; + GROW; + NEXT; + c = CUR_CHAR(l); + /* + * NOTE: 4.4.5 Included in Literal + * When a parameter entity reference appears in a literal entity + * value, ... a single or double quote character in the replacement + * text is always treated as a normal data character and will not + * terminate the literal. + * In practice it means we stop the loop only when back at parsing + * the initial entity and the quote is found + */ + while ((IS_CHAR(c)) && ((c != stop) || /* checked */ + (ctxt->input != input))) { + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ + xmlPopInput(ctxt); + + GROW; + c = CUR_CHAR(l); + if (c == 0) { + GROW; + c = CUR_CHAR(l); + } + } + buf[len] = 0; + + /* + * Raise problem w.r.t. '&' and '%' being used in non-entities + * reference constructs. Note Charref will be handled in + * xmlStringDecodeEntities() + */ + cur = buf; + while (*cur != 0) { /* non input consuming */ + if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { + xmlChar *name; + xmlChar tmp = *cur; + + cur++; + name = xmlParseStringName(ctxt, &cur); + if ((name == NULL) || (*cur != ';')) { + ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "EntityValue: '%c' forbidden except for entities references\n", + tmp); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if ((tmp == '%') && (ctxt->inSubset == 1) && + (ctxt->inputNr == 1)) { + ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "EntityValue: PEReferences forbidden in internal subset\n", + tmp); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (name != NULL) + xmlFree(name); + } + cur++; + } + + /* + * Then PEReference entities are substituted. + */ + if (c != stop) { + ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + xmlFree(buf); + } else { + NEXT; + /* + * NOTE: 4.4.7 Bypassed + * When a general entity reference appears in the EntityValue in + * an entity declaration, it is bypassed and left as is. + * so XML_SUBSTITUTE_REF is not set here. + */ + ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, + 0, 0, 0); + if (orig != NULL) + *orig = buf; + else + xmlFree(buf); + } + + return(ret); +} + +/** + * xmlParseAttValue: + * @ctxt: an XML parser context + * + * parse a value for an attribute + * Note: the parser won't do substitution of entities here, this + * will be handled later in xmlStringGetNodeList + * + * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | + * "'" ([^<&'] | Reference)* "'" + * + * 3.3.3 Attribute-Value Normalization: + * Before the value of an attribute is passed to the application or + * checked for validity, the XML processor must normalize it as follows: + * - a character reference is processed by appending the referenced + * character to the attribute value + * - an entity reference is processed by recursively processing the + * replacement text of the entity + * - a whitespace character (#x20, #xD, #xA, #x9) is processed by + * appending #x20 to the normalized value, except that only a single + * #x20 is appended for a "#xD#xA" sequence that is part of an external + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value + * If the declared value is not CDATA, then the XML processor must further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by a single space (#x20) character. + * All attributes for which no declaration has been read should be treated + * by a non-validating parser as if declared CDATA. + * + * Returns the AttValue parsed or NULL. The value has to be freed by the caller. + */ + +xmlChar * +xmlParseAttValueComplex(xmlParserCtxtPtr ctxt); + +xmlChar * +xmlParseAttValue(xmlParserCtxtPtr ctxt) { + xmlChar limit = 0; + const xmlChar *in = NULL; + xmlChar *ret = NULL; + SHRINK; + GROW; + in = (xmlChar *) CUR_PTR; + if (*in != '"' && *in != '\'') { + ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; + limit = *in; + ++in; + + while (*in != limit && *in >= 0x20 && *in <= 0x7f && + *in != '&' && *in != '<' + ) { + ++in; + } + if (*in != limit) { + return xmlParseAttValueComplex(ctxt); + } + ++in; + ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2); + CUR_PTR = in; + return ret; +} + +/** + * xmlParseAttValueComplex: + * @ctxt: an XML parser context + * + * parse a value for an attribute, this is the fallback function + * of xmlParseAttValue() when the attribute parsing requires handling + * of non-ASCII characters. + * + * Returns the AttValue parsed or NULL. The value has to be freed by the caller. + */ +xmlChar * +xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) { + xmlChar limit = 0; + xmlChar *buf = NULL; + int len = 0; + int buf_size = 0; + int c, l; + xmlChar *current = NULL; + xmlEntityPtr ent; + + + SHRINK; + if (NXT(0) == '"') { + ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; + limit = '"'; + NEXT; + } else if (NXT(0) == '\'') { + limit = '\''; + ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; + NEXT; + } else { + ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + + /* + * allocate a translation buffer. + */ + buf_size = XML_PARSER_BUFFER_SIZE; + buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlParseAttValue: malloc failed"); + return(NULL); + } + + /* + * OK loop until we reach one of the ending char or a size limit. + */ + c = CUR_CHAR(l); + while ((NXT(0) != limit) && /* checked */ + (c != '<')) { + if (c == 0) break; + if (c == '&') { + if (NXT(1) == '#') { + int val = xmlParseCharRef(ctxt); + if (val == '&') { + if (ctxt->replaceEntities) { + if (len > buf_size - 10) { + growBuffer(buf); + } + buf[len++] = '&'; + } else { + /* + * The reparsing will be done in xmlStringGetNodeList() + * called by the attribute() function in SAX.c + */ + static xmlChar buffer[6] = "&"; + + if (len > buf_size - 10) { + growBuffer(buf); + } + current = &buffer[0]; + while (*current != 0) { /* non input consuming */ + buf[len++] = *current++; + } + } + } else { + if (len > buf_size - 10) { + growBuffer(buf); + } + len += xmlCopyChar(0, &buf[len], val); + } + } else { + ent = xmlParseEntityRef(ctxt); + if ((ent != NULL) && + (ctxt->replaceEntities != 0)) { + xmlChar *rep; + + if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { + rep = xmlStringDecodeEntities(ctxt, ent->content, + XML_SUBSTITUTE_REF, 0, 0, 0); + if (rep != NULL) { + current = rep; + while (*current != 0) { /* non input consuming */ + buf[len++] = *current++; + if (len > buf_size - 10) { + growBuffer(buf); + } + } + xmlFree(rep); + } + } else { + if (len > buf_size - 10) { + growBuffer(buf); + } + if (ent->content != NULL) + buf[len++] = ent->content[0]; + } + } else if (ent != NULL) { + int i = xmlStrlen(ent->name); + const xmlChar *cur = ent->name; + + /* + * This may look absurd but is needed to detect + * entities problems + */ + if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && + (ent->content != NULL)) { + xmlChar *rep; + rep = xmlStringDecodeEntities(ctxt, ent->content, + XML_SUBSTITUTE_REF, 0, 0, 0); + if (rep != NULL) + xmlFree(rep); + } + + /* + * Just output the reference + */ + buf[len++] = '&'; + if (len > buf_size - i - 10) { + growBuffer(buf); + } + for (;i > 0;i--) + buf[len++] = *cur++; + buf[len++] = ';'; + } + } + } else { + if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { + COPY_BUF(l,buf,len,0x20); + if (len > buf_size - 10) { + growBuffer(buf); + } + } else { + COPY_BUF(l,buf,len,c); + if (len > buf_size - 10) { + growBuffer(buf); + } + } + NEXTL(l); + } + GROW; + c = CUR_CHAR(l); + } + buf[len++] = 0; + if (RAW == '<') { + ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Unescaped '<' not allowed in attributes values\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else if (RAW != limit) { + ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + NEXT; + return(buf); +} + +/** + * xmlParseSystemLiteral: + * @ctxt: an XML parser context + * + * parse an XML Literal + * + * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") + * + * Returns the SystemLiteral parsed or NULL + */ + +xmlChar * +xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + int cur, l; + xmlChar stop; + int state = ctxt->instate; + int count = 0; + + SHRINK; + if (RAW == '"') { + NEXT; + stop = '"'; + } else if (RAW == '\'') { + NEXT; + stop = '\''; + } else { + ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + ctxt->instate = XML_PARSER_SYSTEM_LITERAL; + cur = CUR_CHAR(l); + while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = (xmlParserInputState) state; + return(NULL); + } + } + count++; + if (count > 50) { + GROW; + count = 0; + } + COPY_BUF(l,buf,len,cur); + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + GROW; + SHRINK; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + ctxt->instate = (xmlParserInputState) state; + if (!IS_CHAR(cur)) { + ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + NEXT; + } + return(buf); +} + +/** + * xmlParsePubidLiteral: + * @ctxt: an XML parser context + * + * parse an XML public literal + * + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + * + * Returns the PubidLiteral parsed or NULL. + */ + +xmlChar * +xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + xmlChar cur; + xmlChar stop; + int count = 0; + xmlParserInputState oldstate = ctxt->instate; + + SHRINK; + if (RAW == '"') { + NEXT; + stop = '"'; + } else if (RAW == '\'') { + NEXT; + stop = '\''; + } else { + ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + ctxt->instate = XML_PARSER_PUBLIC_LITERAL; + cur = CUR; + while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ + if (len + 1 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + buf[len++] = cur; + count++; + if (count > 50) { + GROW; + count = 0; + } + NEXT; + cur = CUR; + if (cur == 0) { + GROW; + SHRINK; + cur = CUR; + } + } + buf[len] = 0; + if (cur != stop) { + ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + NEXT; + } + ctxt->instate = oldstate; + return(buf); +} + +void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); +/** + * xmlParseCharData: + * @ctxt: an XML parser context + * @cdata: int indicating whether we are within a CDATA section + * + * parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * The right angle bracket (>) may be represented using the string ">", + * and must, for compatibility, be escaped using ">" or a character + * reference when it appears in the string "]]>" in content, when that + * string is not marking the end of a CDATA section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +void +xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { + const xmlChar *in; + int nbchar = 0; + int line = ctxt->input->line; + int col = ctxt->input->col; + + SHRINK; + GROW; + /* + * Accelerated common case where input don't need to be + * modified before passing it to the handler. + */ + if (!cdata) { + in = ctxt->input->cur; + do { +get_more: + while (((*in >= 0x20) && (*in != '<') && (*in != ']') && + (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) + in++; + if (*in == 0xA) { + ctxt->input->line++; + in++; + while (*in == 0xA) { + ctxt->input->line++; + in++; + } + goto get_more; + } + if (*in == ']') { + if ((in[1] == ']') && (in[2] == '>')) { + ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Sequence ']]>' not allowed in content\n"); + ctxt->input->cur = in; + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + in++; + goto get_more; + } + nbchar = in - ctxt->input->cur; + if (nbchar > 0) { + if (IS_BLANK(*ctxt->input->cur)) { + const xmlChar *tmp = ctxt->input->cur; + ctxt->input->cur = in; + if (areBlanks(ctxt, tmp, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + tmp, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, + tmp, nbchar); + } + line = ctxt->input->line; + col = ctxt->input->col; + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, + ctxt->input->cur, nbchar); + line = ctxt->input->line; + col = ctxt->input->col; + } + } + ctxt->input->cur = in; + if (*in == 0xD) { + in++; + if (*in == 0xA) { + ctxt->input->cur = in; + in++; + ctxt->input->line++; + continue; /* while */ + } + in--; + } + if (*in == '<') { + return; + } + if (*in == '&') { + return; + } + SHRINK; + GROW; + in = ctxt->input->cur; + } while ((*in >= 0x20) && (*in <= 0x7F)); + nbchar = 0; + } + ctxt->input->line = line; + ctxt->input->col = col; + xmlParseCharDataComplex(ctxt, cdata); +} + +/** + * xmlParseCharDataComplex: + * @ctxt: an XML parser context + * @cdata: int indicating whether we are within a CDATA section + * + * parse a CharData section.this is the fallback function + * of xmlParseCharData() when the parsing requires handling + * of non-ASCII characters. + */ +void +xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { + xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; + int nbchar = 0; + int cur, l; + int count = 0; + + SHRINK; + GROW; + cur = CUR_CHAR(l); + while ((cur != '<') && /* checked */ + (cur != '&') && + (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { + if ((cur == ']') && (NXT(1) == ']') && + (NXT(2) == '>')) { + if (cdata) break; + else { + ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Sequence ']]>' not allowed in content\n"); + /* Should this be relaxed ??? I see a "must here */ + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + COPY_BUF(l,buf,nbchar,cur); + if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { + /* + * OK the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + buf, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + nbchar = 0; + } + count++; + if (count > 50) { + GROW; + count = 0; + } + NEXTL(l); + cur = CUR_CHAR(l); + } + if (nbchar != 0) { + /* + * OK the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + } +} + +/** + * xmlParseExternalID: + * @ctxt: an XML parser context + * @publicID: a xmlChar** receiving PubidLiteral + * @strict: indicate whether we should restrict parsing to only + * production [75], see NOTE below + * + * Parse an External ID or a Public ID + * + * NOTE: Productions [75] and [83] interact badly since [75] can generate + * 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [75] ExternalID ::= 'SYSTEM' S SystemLiteral + * | 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [83] PublicID ::= 'PUBLIC' S PubidLiteral + * + * Returns the function returns SystemLiteral and in the second + * case publicID receives PubidLiteral, is strict is off + * it is possible to return NULL and have publicID set. + */ + +xmlChar * +xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { + xmlChar *URI = NULL; + + SHRINK; + + *publicID = NULL; + if ((RAW == 'S') && (NXT(1) == 'Y') && + (NXT(2) == 'S') && (NXT(3) == 'T') && + (NXT(4) == 'E') && (NXT(5) == 'M')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'SYSTEM'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + URI = xmlParseSystemLiteral(ctxt); + if (URI == NULL) { + ctxt->errNo = XML_ERR_URI_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseExternalID: SYSTEM, no URI\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } else if ((RAW == 'P') && (NXT(1) == 'U') && + (NXT(2) == 'B') && (NXT(3) == 'L') && + (NXT(4) == 'I') && (NXT(5) == 'C')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'PUBLIC'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + *publicID = xmlParsePubidLiteral(ctxt); + if (*publicID == NULL) { + ctxt->errNo = XML_ERR_PUBID_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseExternalID: PUBLIC, no Public Identifier\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (strict) { + /* + * We don't handle [83] so "S SystemLiteral" is required. + */ + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the Public Identifier\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } else { + /* + * We handle [83] so we return immediately, if + * "S SystemLiteral" is not detected. From a purely parsing + * point of view that's a nice mess. + */ + const xmlChar *ptr; + GROW; + + ptr = CUR_PTR; + if (!IS_BLANK(*ptr)) return(NULL); + + while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ + if ((*ptr != '\'') && (*ptr != '"')) return(NULL); + } + SKIP_BLANKS; + URI = xmlParseSystemLiteral(ctxt); + if (URI == NULL) { + ctxt->errNo = XML_ERR_URI_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseExternalID: PUBLIC, no URI\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + return(URI); +} + +/** + * xmlParseComment: + * @ctxt: an XML parser context + * + * Skip an XML (SGML) comment <!-- .... --> + * The spec says that "For compatibility, the string "--" (double-hyphen) + * must not occur within comments. " + * + * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' + */ +void +xmlParseComment(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len; + int size = XML_PARSER_BUFFER_SIZE; + int q, ql; + int r, rl; + int cur, l; + xmlParserInputState state; + xmlParserInputPtr input = ctxt->input; + int count = 0; + + /* + * Check that there is a comment right here. + */ + if ((RAW != '<') || (NXT(1) != '!') || + (NXT(2) != '-') || (NXT(3) != '-')) return; + + state = ctxt->instate; + ctxt->instate = XML_PARSER_COMMENT; + SHRINK; + SKIP(4); + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + q = CUR_CHAR(ql); + NEXTL(ql); + r = CUR_CHAR(rl); + NEXTL(rl); + cur = CUR_CHAR(l); + len = 0; + while (IS_CHAR(cur) && /* checked */ + ((cur != '>') || + (r != '-') || (q != '-'))) { + if ((r == '-') && (q == '-')) { + ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Comment must not contain '--' (double-hyphen)`\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + } + COPY_BUF(ql,buf,len,q); + q = r; + ql = rl; + r = cur; + rl = l; + + count++; + if (count > 50) { + GROW; + count = 0; + } + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (!IS_CHAR(cur)) { + ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Comment not terminated \n<!--%.50s\n", buf); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + xmlFree(buf); + } else { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, +"Comment doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + NEXT; + if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->comment(ctxt->userData, buf); + xmlFree(buf); + } + ctxt->instate = state; +} + +/** + * xmlParsePITarget: + * @ctxt: an XML parser context + * + * parse the name of a PI + * + * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) + * + * Returns the PITarget name or NULL + */ + +xmlChar * +xmlParsePITarget(xmlParserCtxtPtr ctxt) { + xmlChar *name; + + name = xmlParseName(ctxt); + if ((name != NULL) && + ((name[0] == 'x') || (name[0] == 'X')) && + ((name[1] == 'm') || (name[1] == 'M')) && + ((name[2] == 'l') || (name[2] == 'L'))) { + int i; + if ((name[0] == 'x') && (name[1] == 'm') && + (name[2] == 'l') && (name[3] == 0)) { + ctxt->errNo = XML_ERR_RESERVED_XML_NAME; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "XML declaration allowed only at the start of the document\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(name); + } else if (name[3] == 0) { + ctxt->errNo = XML_ERR_RESERVED_XML_NAME; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(name); + } + for (i = 0;;i++) { + if (xmlW3CPIs[i] == NULL) break; + if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) + return(name); + } + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { + ctxt->errNo = XML_ERR_RESERVED_XML_NAME; + ctxt->sax->warning(ctxt->userData, + "xmlParsePITarget: invalid name prefix 'xml'\n"); + } + } + return(name); +} + +#ifdef LIBXML_CATALOG_ENABLED +/** + * xmlParseCatalogPI: + * @ctxt: an XML parser context + * @catalog: the PI value string + * + * parse an XML Catalog Processing Instruction. + * + * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> + * + * Occurs only if allowed by the user and if happening in the Misc + * part of the document before any doctype informations + * This will add the given catalog to the parsing context in order + * to be used if there is a resolution need further down in the document + */ + +static void +xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { + xmlChar *URL = NULL; + const xmlChar *tmp, *base; + xmlChar marker; + + tmp = catalog; + while (IS_BLANK(*tmp)) tmp++; + if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) + goto error; + tmp += 7; + while (IS_BLANK(*tmp)) tmp++; + if (*tmp != '=') { + return; + } + tmp++; + while (IS_BLANK(*tmp)) tmp++; + marker = *tmp; + if ((marker != '\'') && (marker != '"')) + goto error; + tmp++; + base = tmp; + while ((*tmp != 0) && (*tmp != marker)) tmp++; + if (*tmp == 0) + goto error; + URL = xmlStrndup(base, tmp - base); + tmp++; + while (IS_BLANK(*tmp)) tmp++; + if (*tmp != 0) + goto error; + + if (URL != NULL) { + ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); + xmlFree(URL); + } + return; + +error: + ctxt->errNo = XML_WAR_CATALOG_PI; + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Catalog PI syntax error: %s\n", catalog); + if (URL != NULL) + xmlFree(URL); +} +#endif + +/** + * xmlParsePI: + * @ctxt: an XML parser context + * + * parse an XML Processing Instruction. + * + * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' + * + * The processing is transfered to SAX once parsed. + */ + +void +xmlParsePI(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + int cur, l; + xmlChar *target; + xmlParserInputState state; + int count = 0; + + if ((RAW == '<') && (NXT(1) == '?')) { + xmlParserInputPtr input = ctxt->input; + state = ctxt->instate; + ctxt->instate = XML_PARSER_PI; + /* + * this is a Processing Instruction. + */ + SKIP(2); + SHRINK; + + /* + * Parse the target name and check for special support like + * namespace. + */ + target = xmlParsePITarget(ctxt); + if (target != NULL) { + if ((RAW == '?') && (NXT(1) == '>')) { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PI declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP(2); + + /* + * SAX: PI detected. + */ + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->processingInstruction != NULL)) + ctxt->sax->processingInstruction(ctxt->userData, + target, NULL); + ctxt->instate = state; + xmlFree(target); + return; + } + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + cur = CUR; + if (!IS_BLANK(cur)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParsePI: PI %s space expected\n", target); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + cur = CUR_CHAR(l); + while (IS_CHAR(cur) && /* checked */ + ((cur != '?') || (NXT(1) != '>'))) { + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + } + count++; + if (count > 50) { + GROW; + count = 0; + } + COPY_BUF(l,buf,len,cur); + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (cur != '?') { + ctxt->errNo = XML_ERR_PI_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParsePI: PI %s never end ...\n", target); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PI declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP(2); + +#ifdef LIBXML_CATALOG_ENABLED + if (((state == XML_PARSER_MISC) || + (state == XML_PARSER_START)) && + (xmlStrEqual(target, XML_CATALOG_PI))) { + xmlCatalogAllow allow = xmlCatalogGetDefaults(); + if ((allow == XML_CATA_ALLOW_DOCUMENT) || + (allow == XML_CATA_ALLOW_ALL)) + xmlParseCatalogPI(ctxt, buf); + } +#endif + + + /* + * SAX: PI detected. + */ + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->processingInstruction != NULL)) + ctxt->sax->processingInstruction(ctxt->userData, + target, buf); + } + xmlFree(buf); + xmlFree(target); + } else { + ctxt->errNo = XML_ERR_PI_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParsePI : no target name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + ctxt->instate = state; + } +} + +/** + * xmlParseNotationDecl: + * @ctxt: an XML parser context + * + * parse a notation declaration + * + * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' + * + * Hence there is actually 3 choices: + * 'PUBLIC' S PubidLiteral + * 'PUBLIC' S PubidLiteral S SystemLiteral + * and 'SYSTEM' S SystemLiteral + * + * See the NOTE on xmlParseExternalID(). + */ + +void +xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *Pubid; + xmlChar *Systemid; + + if ((RAW == '<') && (NXT(1) == '!') && + (NXT(2) == 'N') && (NXT(3) == 'O') && + (NXT(4) == 'T') && (NXT(5) == 'A') && + (NXT(6) == 'T') && (NXT(7) == 'I') && + (NXT(8) == 'O') && (NXT(9) == 'N')) { + xmlParserInputPtr input = ctxt->input; + SHRINK; + SKIP(10); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after '<!NOTATION'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + SKIP_BLANKS; + + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "NOTATION: Name expected here\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the NOTATION name'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + SKIP_BLANKS; + + /* + * Parse the IDs. + */ + Systemid = xmlParseExternalID(ctxt, &Pubid, 0); + SKIP_BLANKS; + + if (RAW == '>') { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, +"Notation declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + NEXT; + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->notationDecl != NULL)) + ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); + } else { + ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "'>' required to close NOTATION declaration\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + xmlFree(name); + if (Systemid != NULL) xmlFree(Systemid); + if (Pubid != NULL) xmlFree(Pubid); + } +} + +/** + * xmlParseEntityDecl: + * @ctxt: an XML parser context + * + * parse <!ENTITY declarations + * + * [70] EntityDecl ::= GEDecl | PEDecl + * + * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' + * + * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' + * + * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) + * + * [74] PEDef ::= EntityValue | ExternalID + * + * [76] NDataDecl ::= S 'NDATA' S Name + * + * [ VC: Notation Declared ] + * The Name must match the declared name of a notation. + */ + +void +xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { + xmlChar *name = NULL; + xmlChar *value = NULL; + xmlChar *URI = NULL, *literal = NULL; + xmlChar *ndata = NULL; + int isParameter = 0; + xmlChar *orig = NULL; + int skipped; + + GROW; + if ((RAW == '<') && (NXT(1) == '!') && + (NXT(2) == 'E') && (NXT(3) == 'N') && + (NXT(4) == 'T') && (NXT(5) == 'I') && + (NXT(6) == 'T') && (NXT(7) == 'Y')) { + xmlParserInputPtr input = ctxt->input; + SHRINK; + SKIP(8); + skipped = SKIP_BLANKS; + if (skipped == 0) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after '<!ENTITY'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + if (RAW == '%') { + NEXT; + skipped = SKIP_BLANKS; + if (skipped == 0) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after '%'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + isParameter = 1; + } + + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + skipped = SKIP_BLANKS; + if (skipped == 0) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the entity name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + ctxt->instate = XML_PARSER_ENTITY_DECL; + /* + * handle the various case of definitions... + */ + if (isParameter) { + if ((RAW == '"') || (RAW == '\'')) { + value = xmlParseEntityValue(ctxt, &orig); + if (value) { + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) + ctxt->sax->entityDecl(ctxt->userData, name, + XML_INTERNAL_PARAMETER_ENTITY, + NULL, NULL, value); + } + } else { + URI = xmlParseExternalID(ctxt, &literal, 1); + if ((URI == NULL) && (literal == NULL)) { + ctxt->errNo = XML_ERR_VALUE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity value required\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (URI) { + xmlURIPtr uri; + + uri = xmlParseURI((const char *) URI); + if (uri == NULL) { + ctxt->errNo = XML_ERR_INVALID_URI; + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Invalid URI: %s\n", URI); + /* + * This really ought to be a well formedness error + * but the XML Core WG decided otherwise c.f. issue + * E26 of the XML erratas. + */ + } else { + if (uri->fragment != NULL) { + ctxt->errNo = XML_ERR_URI_FRAGMENT; + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Fragment not allowed: %s\n", URI); + /* + * Okay this is foolish to block those but not + * invalid URIs. + */ + ctxt->wellFormed = 0; + } else { + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->entityDecl != NULL)) + ctxt->sax->entityDecl(ctxt->userData, name, + XML_EXTERNAL_PARAMETER_ENTITY, + literal, URI, NULL); + } + xmlFreeURI(uri); + } + } + } + } else { + if ((RAW == '"') || (RAW == '\'')) { + value = xmlParseEntityValue(ctxt, &orig); + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) + ctxt->sax->entityDecl(ctxt->userData, name, + XML_INTERNAL_GENERAL_ENTITY, + NULL, NULL, value); + /* + * For expat compatibility in SAX mode. + */ + if ((ctxt->myDoc == NULL) || + (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { + if (ctxt->myDoc == NULL) { + ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); + } + if (ctxt->myDoc->intSubset == NULL) + ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, + BAD_CAST "fake", NULL, NULL); + + entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, + NULL, NULL, value); + } + } else { + URI = xmlParseExternalID(ctxt, &literal, 1); + if ((URI == NULL) && (literal == NULL)) { + ctxt->errNo = XML_ERR_VALUE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity value required\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (URI) { + xmlURIPtr uri; + + uri = xmlParseURI((const char *)URI); + if (uri == NULL) { + ctxt->errNo = XML_ERR_INVALID_URI; + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Invalid URI: %s\n", URI); + /* + * This really ought to be a well formedness error + * but the XML Core WG decided otherwise c.f. issue + * E26 of the XML erratas. + */ + } else { + if (uri->fragment != NULL) { + ctxt->errNo = XML_ERR_URI_FRAGMENT; + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Fragment not allowed: %s\n", URI); + /* + * Okay this is foolish to block those but not + * invalid URIs. + */ + ctxt->wellFormed = 0; + } + xmlFreeURI(uri); + } + } + if ((RAW != '>') && (!IS_BLANK(CUR))) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required before 'NDATA'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + if ((RAW == 'N') && (NXT(1) == 'D') && + (NXT(2) == 'A') && (NXT(3) == 'T') && + (NXT(4) == 'A')) { + SKIP(5); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'NDATA'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + ndata = xmlParseName(ctxt); + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->unparsedEntityDecl != NULL)) + ctxt->sax->unparsedEntityDecl(ctxt->userData, name, + literal, URI, ndata); + } else { + if ((ctxt->sax != NULL) && + (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) + ctxt->sax->entityDecl(ctxt->userData, name, + XML_EXTERNAL_GENERAL_PARSED_ENTITY, + literal, URI, NULL); + /* + * For expat compatibility in SAX mode. + * assuming the entity repalcement was asked for + */ + if ((ctxt->replaceEntities != 0) && + ((ctxt->myDoc == NULL) || + (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { + if (ctxt->myDoc == NULL) { + ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); + } + + if (ctxt->myDoc->intSubset == NULL) + ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, + BAD_CAST "fake", NULL, NULL); + entityDecl(ctxt, name, + XML_EXTERNAL_GENERAL_PARSED_ENTITY, + literal, URI, NULL); + } + } + } + } + SKIP_BLANKS; + if (RAW != '>') { + ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseEntityDecl: entity %s not terminated\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, +"Entity declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + NEXT; + } + if (orig != NULL) { + /* + * Ugly mechanism to save the raw entity value. + */ + xmlEntityPtr cur = NULL; + + if (isParameter) { + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + cur = ctxt->sax->getParameterEntity(ctxt->userData, name); + } else { + if ((ctxt->sax != NULL) && + (ctxt->sax->getEntity != NULL)) + cur = ctxt->sax->getEntity(ctxt->userData, name); + if ((cur == NULL) && (ctxt->userData==ctxt)) { + cur = getEntity(ctxt, name); + } + } + if (cur != NULL) { + if (cur->orig != NULL) + xmlFree(orig); + else + cur->orig = orig; + } else + xmlFree(orig); + } + if (name != NULL) xmlFree(name); + if (value != NULL) xmlFree(value); + if (URI != NULL) xmlFree(URI); + if (literal != NULL) xmlFree(literal); + if (ndata != NULL) xmlFree(ndata); + } +} + +/** + * xmlParseDefaultDecl: + * @ctxt: an XML parser context + * @value: Receive a possible fixed default value for the attribute + * + * Parse an attribute default declaration + * + * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) + * + * [ VC: Required Attribute ] + * if the default declaration is the keyword #REQUIRED, then the + * attribute must be specified for all elements of the type in the + * attribute-list declaration. + * + * [ VC: Attribute Default Legal ] + * The declared default value must meet the lexical constraints of + * the declared attribute type c.f. xmlValidateAttributeDecl() + * + * [ VC: Fixed Attribute Default ] + * if an attribute has a default value declared with the #FIXED + * keyword, instances of that attribute must match the default value. + * + * [ WFC: No < in Attribute Values ] + * handled in xmlParseAttValue() + * + * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED + * or XML_ATTRIBUTE_FIXED. + */ + +int +xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { + int val; + xmlChar *ret; + + *value = NULL; + if ((RAW == '#') && (NXT(1) == 'R') && + (NXT(2) == 'E') && (NXT(3) == 'Q') && + (NXT(4) == 'U') && (NXT(5) == 'I') && + (NXT(6) == 'R') && (NXT(7) == 'E') && + (NXT(8) == 'D')) { + SKIP(9); + return(XML_ATTRIBUTE_REQUIRED); + } + if ((RAW == '#') && (NXT(1) == 'I') && + (NXT(2) == 'M') && (NXT(3) == 'P') && + (NXT(4) == 'L') && (NXT(5) == 'I') && + (NXT(6) == 'E') && (NXT(7) == 'D')) { + SKIP(8); + return(XML_ATTRIBUTE_IMPLIED); + } + val = XML_ATTRIBUTE_NONE; + if ((RAW == '#') && (NXT(1) == 'F') && + (NXT(2) == 'I') && (NXT(3) == 'X') && + (NXT(4) == 'E') && (NXT(5) == 'D')) { + SKIP(6); + val = XML_ATTRIBUTE_FIXED; + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after '#FIXED'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + } + ret = xmlParseAttValue(ctxt); + ctxt->instate = XML_PARSER_DTD; + if (ret == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Attribute default value declaration error\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + *value = ret; + return(val); +} + +/** + * xmlParseNotationType: + * @ctxt: an XML parser context + * + * parse an Notation attribute type. + * + * Note: the leading 'NOTATION' S part has already being parsed... + * + * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' + * + * [ VC: Notation Attributes ] + * Values of this type must match one of the notation names included + * in the declaration; all notation names in the declaration must be declared. + * + * Returns: the notation attribute tree built while parsing + */ + +xmlEnumerationPtr +xmlParseNotationType(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlEnumerationPtr ret = NULL, last = NULL, cur; + + if (RAW != '(') { + ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "'(' required to start 'NOTATION'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + SHRINK; + do { + NEXT; + SKIP_BLANKS; + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Name expected in NOTATION declaration\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(ret); + } + cur = xmlCreateEnumeration(name); + xmlFree(name); + if (cur == NULL) return(ret); + if (last == NULL) ret = last = cur; + else { + last->next = cur; + last = cur; + } + SKIP_BLANKS; + } while (RAW == '|'); + if (RAW != ')') { + ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "')' required to finish NOTATION declaration\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if ((last != NULL) && (last != ret)) + xmlFreeEnumeration(last); + return(ret); + } + NEXT; + return(ret); +} + +/** + * xmlParseEnumerationType: + * @ctxt: an XML parser context + * + * parse an Enumeration attribute type. + * + * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' + * + * [ VC: Enumeration ] + * Values of this type must match one of the Nmtoken tokens in + * the declaration + * + * Returns: the enumeration attribute tree built while parsing + */ + +xmlEnumerationPtr +xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlEnumerationPtr ret = NULL, last = NULL, cur; + + if (RAW != '(') { + ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "'(' required to start ATTLIST enumeration\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + SHRINK; + do { + NEXT; + SKIP_BLANKS; + name = xmlParseNmtoken(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "NmToken expected in ATTLIST enumeration\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(ret); + } + cur = xmlCreateEnumeration(name); + xmlFree(name); + if (cur == NULL) return(ret); + if (last == NULL) ret = last = cur; + else { + last->next = cur; + last = cur; + } + SKIP_BLANKS; + } while (RAW == '|'); + if (RAW != ')') { + ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "')' required to finish ATTLIST enumeration\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(ret); + } + NEXT; + return(ret); +} + +/** + * xmlParseEnumeratedType: + * @ctxt: an XML parser context + * @tree: the enumeration tree built while parsing + * + * parse an Enumerated attribute type. + * + * [57] EnumeratedType ::= NotationType | Enumeration + * + * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' + * + * + * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION + */ + +int +xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { + if ((RAW == 'N') && (NXT(1) == 'O') && + (NXT(2) == 'T') && (NXT(3) == 'A') && + (NXT(4) == 'T') && (NXT(5) == 'I') && + (NXT(6) == 'O') && (NXT(7) == 'N')) { + SKIP(8); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'NOTATION'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(0); + } + SKIP_BLANKS; + *tree = xmlParseNotationType(ctxt); + if (*tree == NULL) return(0); + return(XML_ATTRIBUTE_NOTATION); + } + *tree = xmlParseEnumerationType(ctxt); + if (*tree == NULL) return(0); + return(XML_ATTRIBUTE_ENUMERATION); +} + +/** + * xmlParseAttributeType: + * @ctxt: an XML parser context + * @tree: the enumeration tree built while parsing + * + * parse the Attribute list def for an element + * + * [54] AttType ::= StringType | TokenizedType | EnumeratedType + * + * [55] StringType ::= 'CDATA' + * + * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | + * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' + * + * Validity constraints for attribute values syntax are checked in + * xmlValidateAttributeValue() + * + * [ VC: ID ] + * Values of type ID must match the Name production. A name must not + * appear more than once in an XML document as a value of this type; + * i.e., ID values must uniquely identify the elements which bear them. + * + * [ VC: One ID per Element Type ] + * No element type may have more than one ID attribute specified. + * + * [ VC: ID Attribute Default ] + * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. + * + * [ VC: IDREF ] + * Values of type IDREF must match the Name production, and values + * of type IDREFS must match Names; each IDREF Name must match the value + * of an ID attribute on some element in the XML document; i.e. IDREF + * values must match the value of some ID attribute. + * + * [ VC: Entity Name ] + * Values of type ENTITY must match the Name production, values + * of type ENTITIES must match Names; each Entity Name must match the + * name of an unparsed entity declared in the DTD. + * + * [ VC: Name Token ] + * Values of type NMTOKEN must match the Nmtoken production; values + * of type NMTOKENS must match Nmtokens. + * + * Returns the attribute type + */ +int +xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { + SHRINK; + if ((RAW == 'C') && (NXT(1) == 'D') && + (NXT(2) == 'A') && (NXT(3) == 'T') && + (NXT(4) == 'A')) { + SKIP(5); + return(XML_ATTRIBUTE_CDATA); + } else if ((RAW == 'I') && (NXT(1) == 'D') && + (NXT(2) == 'R') && (NXT(3) == 'E') && + (NXT(4) == 'F') && (NXT(5) == 'S')) { + SKIP(6); + return(XML_ATTRIBUTE_IDREFS); + } else if ((RAW == 'I') && (NXT(1) == 'D') && + (NXT(2) == 'R') && (NXT(3) == 'E') && + (NXT(4) == 'F')) { + SKIP(5); + return(XML_ATTRIBUTE_IDREF); + } else if ((RAW == 'I') && (NXT(1) == 'D')) { + SKIP(2); + return(XML_ATTRIBUTE_ID); + } else if ((RAW == 'E') && (NXT(1) == 'N') && + (NXT(2) == 'T') && (NXT(3) == 'I') && + (NXT(4) == 'T') && (NXT(5) == 'Y')) { + SKIP(6); + return(XML_ATTRIBUTE_ENTITY); + } else if ((RAW == 'E') && (NXT(1) == 'N') && + (NXT(2) == 'T') && (NXT(3) == 'I') && + (NXT(4) == 'T') && (NXT(5) == 'I') && + (NXT(6) == 'E') && (NXT(7) == 'S')) { + SKIP(8); + return(XML_ATTRIBUTE_ENTITIES); + } else if ((RAW == 'N') && (NXT(1) == 'M') && + (NXT(2) == 'T') && (NXT(3) == 'O') && + (NXT(4) == 'K') && (NXT(5) == 'E') && + (NXT(6) == 'N') && (NXT(7) == 'S')) { + SKIP(8); + return(XML_ATTRIBUTE_NMTOKENS); + } else if ((RAW == 'N') && (NXT(1) == 'M') && + (NXT(2) == 'T') && (NXT(3) == 'O') && + (NXT(4) == 'K') && (NXT(5) == 'E') && + (NXT(6) == 'N')) { + SKIP(7); + return(XML_ATTRIBUTE_NMTOKEN); + } + return(xmlParseEnumeratedType(ctxt, tree)); +} + +/** + * xmlParseAttributeListDecl: + * @ctxt: an XML parser context + * + * : parse the Attribute list def for an element + * + * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' + * + * [53] AttDef ::= S Name S AttType S DefaultDecl + * + */ +void +xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { + xmlChar *elemName; + xmlChar *attrName; + xmlEnumerationPtr tree; + + if ((RAW == '<') && (NXT(1) == '!') && + (NXT(2) == 'A') && (NXT(3) == 'T') && + (NXT(4) == 'T') && (NXT(5) == 'L') && + (NXT(6) == 'I') && (NXT(7) == 'S') && + (NXT(8) == 'T')) { + xmlParserInputPtr input = ctxt->input; + + SKIP(9); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after '<!ATTLIST'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + elemName = xmlParseName(ctxt); + if (elemName == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "ATTLIST: no name for Element\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + SKIP_BLANKS; + GROW; + while (RAW != '>') { + const xmlChar *check = CUR_PTR; + int type; + int def; + xmlChar *defaultValue = NULL; + + GROW; + tree = NULL; + attrName = xmlParseName(ctxt); + if (attrName == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "ATTLIST: no name for Attribute\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + } + GROW; + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the attribute name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if (attrName != NULL) + xmlFree(attrName); + if (defaultValue != NULL) + xmlFree(defaultValue); + break; + } + SKIP_BLANKS; + + type = xmlParseAttributeType(ctxt, &tree); + if (type <= 0) { + if (attrName != NULL) + xmlFree(attrName); + if (defaultValue != NULL) + xmlFree(defaultValue); + break; + } + + GROW; + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the attribute type\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if (attrName != NULL) + xmlFree(attrName); + if (defaultValue != NULL) + xmlFree(defaultValue); + if (tree != NULL) + xmlFreeEnumeration(tree); + break; + } + SKIP_BLANKS; + + def = xmlParseDefaultDecl(ctxt, &defaultValue); + if (def <= 0) { + if (attrName != NULL) + xmlFree(attrName); + if (defaultValue != NULL) + xmlFree(defaultValue); + if (tree != NULL) + xmlFreeEnumeration(tree); + break; + } + + GROW; + if (RAW != '>') { + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the attribute default value\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if (attrName != NULL) + xmlFree(attrName); + if (defaultValue != NULL) + xmlFree(defaultValue); + if (tree != NULL) + xmlFreeEnumeration(tree); + break; + } + SKIP_BLANKS; + } + if (check == CUR_PTR) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseAttributeListDecl: detected internal error\n"); + if (attrName != NULL) + xmlFree(attrName); + if (defaultValue != NULL) + xmlFree(defaultValue); + if (tree != NULL) + xmlFreeEnumeration(tree); + break; + } + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->attributeDecl != NULL)) + ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, + type, def, defaultValue, tree); + if (attrName != NULL) + xmlFree(attrName); + if (defaultValue != NULL) + xmlFree(defaultValue); + GROW; + } + if (RAW == '>') { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, +"Attribute list declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + NEXT; + } + + xmlFree(elemName); + } +} + +/** + * xmlParseElementMixedContentDecl: + * @ctxt: an XML parser context + * @inputchk: the input used for the current entity, needed for boundary checks + * + * parse the declaration for a Mixed Element content + * The leading '(' and spaces have been skipped in xmlParseElementContentDecl + * + * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | + * '(' S? '#PCDATA' S? ')' + * + * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) + * + * [ VC: No Duplicate Types ] + * The same name must not appear more than once in a single + * mixed-content declaration. + * + * returns: the list of the xmlElementContentPtr describing the element choices + */ +xmlElementContentPtr +xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) { + xmlElementContentPtr ret = NULL, cur = NULL, n; + xmlChar *elem = NULL; + + GROW; + if ((RAW == '#') && (NXT(1) == 'P') && + (NXT(2) == 'C') && (NXT(3) == 'D') && + (NXT(4) == 'A') && (NXT(5) == 'T') && + (NXT(6) == 'A')) { + SKIP(7); + SKIP_BLANKS; + SHRINK; + if (RAW == ')') { + if ((ctxt->validate) && (ctxt->input != inputchk)) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if (ctxt->vctxt.error != NULL) + ctxt->vctxt.error(ctxt->vctxt.userData, +"Element content declaration doesn't start and stop in the same entity\n"); + ctxt->valid = 0; + } + NEXT; + ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); + if (RAW == '*') { + ret->ocur = XML_ELEMENT_CONTENT_MULT; + NEXT; + } + return(ret); + } + if ((RAW == '(') || (RAW == '|')) { + ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); + if (ret == NULL) return(NULL); + } + while (RAW == '|') { + NEXT; + if (elem == NULL) { + ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); + if (ret == NULL) return(NULL); + ret->c1 = cur; + if (cur != NULL) + cur->parent = ret; + cur = ret; + } else { + n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); + if (n == NULL) return(NULL); + n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); + if (n->c1 != NULL) + n->c1->parent = n; + cur->c2 = n; + if (n != NULL) + n->parent = cur; + cur = n; + xmlFree(elem); + } + SKIP_BLANKS; + elem = xmlParseName(ctxt); + if (elem == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementMixedContentDecl : Name expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + xmlFreeElementContent(cur); + return(NULL); + } + SKIP_BLANKS; + GROW; + } + if ((RAW == ')') && (NXT(1) == '*')) { + if (elem != NULL) { + cur->c2 = xmlNewElementContent(elem, + XML_ELEMENT_CONTENT_ELEMENT); + if (cur->c2 != NULL) + cur->c2->parent = cur; + xmlFree(elem); + } + ret->ocur = XML_ELEMENT_CONTENT_MULT; + if ((ctxt->validate) && (ctxt->input != inputchk)) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if (ctxt->vctxt.error != NULL) + ctxt->vctxt.error(ctxt->vctxt.userData, +"Element content declaration doesn't start and stop in the same entity\n"); + ctxt->valid = 0; + } + SKIP(2); + } else { + if (elem != NULL) xmlFree(elem); + xmlFreeElementContent(ret); + ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + + } else { + ctxt->errNo = XML_ERR_PCDATA_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + return(ret); +} + +/** + * xmlParseElementChildrenContentDecl: + * @ctxt: an XML parser context + * @inputchk: the input used for the current entity, needed for boundary checks + * + * parse the declaration for a Mixed Element content + * The leading '(' and spaces have been skipped in xmlParseElementContentDecl + * + * + * [47] children ::= (choice | seq) ('?' | '*' | '+')? + * + * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? + * + * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' + * + * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' + * + * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] + * TODO Parameter-entity replacement text must be properly nested + * with parenthesized groups. That is to say, if either of the + * opening or closing parentheses in a choice, seq, or Mixed + * construct is contained in the replacement text for a parameter + * entity, both must be contained in the same replacement text. For + * interoperability, if a parameter-entity reference appears in a + * choice, seq, or Mixed construct, its replacement text should not + * be empty, and neither the first nor last non-blank character of + * the replacement text should be a connector (| or ,). + * + * Returns the tree of xmlElementContentPtr describing the element + * hierarchy. + */ +xmlElementContentPtr +xmlParseElementChildrenContentDecl +(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) { + xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; + xmlChar *elem; + xmlChar type = 0; + + SKIP_BLANKS; + GROW; + if (RAW == '(') { + xmlParserInputPtr input = ctxt->input; + + /* Recurse on first child */ + NEXT; + SKIP_BLANKS; + cur = ret = xmlParseElementChildrenContentDecl(ctxt, input); + SKIP_BLANKS; + GROW; + } else { + elem = xmlParseName(ctxt); + if (elem == NULL) { + ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); + GROW; + if (RAW == '?') { + cur->ocur = XML_ELEMENT_CONTENT_OPT; + NEXT; + } else if (RAW == '*') { + cur->ocur = XML_ELEMENT_CONTENT_MULT; + NEXT; + } else if (RAW == '+') { + cur->ocur = XML_ELEMENT_CONTENT_PLUS; + NEXT; + } else { + cur->ocur = XML_ELEMENT_CONTENT_ONCE; + } + xmlFree(elem); + GROW; + } + SKIP_BLANKS; + SHRINK; + while (RAW != ')') { + /* + * Each loop we parse one separator and one element. + */ + if (RAW == ',') { + if (type == 0) type = CUR; + + /* + * Detect "Name | Name , Name" error + */ + else if (type != CUR) { + ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementChildrenContentDecl : '%c' expected\n", + type); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if ((last != NULL) && (last != ret)) + xmlFreeElementContent(last); + if (ret != NULL) + xmlFreeElementContent(ret); + return(NULL); + } + NEXT; + + op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); + if (op == NULL) { + if ((last != NULL) && (last != ret)) + xmlFreeElementContent(last); + xmlFreeElementContent(ret); + return(NULL); + } + if (last == NULL) { + op->c1 = ret; + if (ret != NULL) + ret->parent = op; + ret = cur = op; + } else { + cur->c2 = op; + if (op != NULL) + op->parent = cur; + op->c1 = last; + if (last != NULL) + last->parent = op; + cur =op; + last = NULL; + } + } else if (RAW == '|') { + if (type == 0) type = CUR; + + /* + * Detect "Name , Name | Name" error + */ + else if (type != CUR) { + ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementChildrenContentDecl : '%c' expected\n", + type); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if ((last != NULL) && (last != ret)) + xmlFreeElementContent(last); + if (ret != NULL) + xmlFreeElementContent(ret); + return(NULL); + } + NEXT; + + op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); + if (op == NULL) { + if ((last != NULL) && (last != ret)) + xmlFreeElementContent(last); + if (ret != NULL) + xmlFreeElementContent(ret); + return(NULL); + } + if (last == NULL) { + op->c1 = ret; + if (ret != NULL) + ret->parent = op; + ret = cur = op; + } else { + cur->c2 = op; + if (op != NULL) + op->parent = cur; + op->c1 = last; + if (last != NULL) + last->parent = op; + cur =op; + last = NULL; + } + } else { + ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if (ret != NULL) + xmlFreeElementContent(ret); + return(NULL); + } + GROW; + SKIP_BLANKS; + GROW; + if (RAW == '(') { + xmlParserInputPtr input = ctxt->input; + /* Recurse on second child */ + NEXT; + SKIP_BLANKS; + last = xmlParseElementChildrenContentDecl(ctxt, input); + SKIP_BLANKS; + } else { + elem = xmlParseName(ctxt); + if (elem == NULL) { + ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if (ret != NULL) + xmlFreeElementContent(ret); + return(NULL); + } + last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); + xmlFree(elem); + if (RAW == '?') { + last->ocur = XML_ELEMENT_CONTENT_OPT; + NEXT; + } else if (RAW == '*') { + last->ocur = XML_ELEMENT_CONTENT_MULT; + NEXT; + } else if (RAW == '+') { + last->ocur = XML_ELEMENT_CONTENT_PLUS; + NEXT; + } else { + last->ocur = XML_ELEMENT_CONTENT_ONCE; + } + } + SKIP_BLANKS; + GROW; + } + if ((cur != NULL) && (last != NULL)) { + cur->c2 = last; + if (last != NULL) + last->parent = cur; + } + if ((ctxt->validate) && (ctxt->input != inputchk)) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if (ctxt->vctxt.error != NULL) + ctxt->vctxt.error(ctxt->vctxt.userData, +"Element content declaration doesn't start and stop in the same entity\n"); + ctxt->valid = 0; + } + NEXT; + if (RAW == '?') { + if (ret != NULL) + ret->ocur = XML_ELEMENT_CONTENT_OPT; + NEXT; + } else if (RAW == '*') { + if (ret != NULL) { + ret->ocur = XML_ELEMENT_CONTENT_MULT; + cur = ret; + /* + * Some normalization: + * (a | b* | c?)* == (a | b | c)* + */ + while (cur->type == XML_ELEMENT_CONTENT_OR) { + if ((cur->c1 != NULL) && + ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || + (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) + cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; + if ((cur->c2 != NULL) && + ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || + (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) + cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; + cur = cur->c2; + } + } + NEXT; + } else if (RAW == '+') { + if (ret != NULL) { + int found = 0; + + ret->ocur = XML_ELEMENT_CONTENT_PLUS; + /* + * Some normalization: + * (a | b*)+ == (a | b)* + * (a | b?)+ == (a | b)* + */ + while (cur->type == XML_ELEMENT_CONTENT_OR) { + if ((cur->c1 != NULL) && + ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || + (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { + cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; + found = 1; + } + if ((cur->c2 != NULL) && + ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || + (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { + cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; + found = 1; + } + cur = cur->c2; + } + if (found) + ret->ocur = XML_ELEMENT_CONTENT_MULT; + } + NEXT; + } + return(ret); +} + +/** + * xmlParseElementContentDecl: + * @ctxt: an XML parser context + * @name: the name of the element being defined. + * @result: the Element Content pointer will be stored here if any + * + * parse the declaration for an Element content either Mixed or Children, + * the cases EMPTY and ANY are handled directly in xmlParseElementDecl + * + * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children + * + * returns: the type of element content XML_ELEMENT_TYPE_xxx + */ + +int +xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, + xmlElementContentPtr *result) { + + xmlElementContentPtr tree = NULL; + xmlParserInputPtr input = ctxt->input; + int res; + + *result = NULL; + + if (RAW != '(') { + ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementContentDecl : %s '(' expected\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(-1); + } + NEXT; + GROW; + SKIP_BLANKS; + if ((RAW == '#') && (NXT(1) == 'P') && + (NXT(2) == 'C') && (NXT(3) == 'D') && + (NXT(4) == 'A') && (NXT(5) == 'T') && + (NXT(6) == 'A')) { + tree = xmlParseElementMixedContentDecl(ctxt, input); + res = XML_ELEMENT_TYPE_MIXED; + } else { + tree = xmlParseElementChildrenContentDecl(ctxt, input); + res = XML_ELEMENT_TYPE_ELEMENT; + } + SKIP_BLANKS; + *result = tree; + return(res); +} + +/** + * xmlParseElementDecl: + * @ctxt: an XML parser context + * + * parse an Element declaration. + * + * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' + * + * [ VC: Unique Element Type Declaration ] + * No element type may be declared more than once + * + * Returns the type of the element, or -1 in case of error + */ +int +xmlParseElementDecl(xmlParserCtxtPtr ctxt) { + xmlChar *name; + int ret = -1; + xmlElementContentPtr content = NULL; + + GROW; + if ((RAW == '<') && (NXT(1) == '!') && + (NXT(2) == 'E') && (NXT(3) == 'L') && + (NXT(4) == 'E') && (NXT(5) == 'M') && + (NXT(6) == 'E') && (NXT(7) == 'N') && + (NXT(8) == 'T')) { + xmlParserInputPtr input = ctxt->input; + + SKIP(9); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'ELEMENT'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementDecl: no name for Element\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(-1); + } + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the element name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + if ((RAW == 'E') && (NXT(1) == 'M') && + (NXT(2) == 'P') && (NXT(3) == 'T') && + (NXT(4) == 'Y')) { + SKIP(5); + /* + * Element must always be empty. + */ + ret = XML_ELEMENT_TYPE_EMPTY; + } else if ((RAW == 'A') && (NXT(1) == 'N') && + (NXT(2) == 'Y')) { + SKIP(3); + /* + * Element is a generic container. + */ + ret = XML_ELEMENT_TYPE_ANY; + } else if (RAW == '(') { + ret = xmlParseElementContentDecl(ctxt, name, &content); + } else { + /* + * [ WFC: PEs in Internal Subset ] error handling. + */ + if ((RAW == '%') && (ctxt->external == 0) && + (ctxt->inputNr == 1)) { + ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PEReference: forbidden within markup decl in internal subset\n"); + } else { + ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); + } + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + if (name != NULL) xmlFree(name); + return(-1); + } + + SKIP_BLANKS; + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + SKIP_BLANKS; + + if (RAW != '>') { + ctxt->errNo = XML_ERR_GT_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseElementDecl: expected '>' at the end\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (input != ctxt->input) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, +"Element declaration doesn't start and stop in the same entity\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + NEXT; + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->elementDecl != NULL)) + ctxt->sax->elementDecl(ctxt->userData, name, ret, + content); + } + if (content != NULL) { + xmlFreeElementContent(content); + } + if (name != NULL) { + xmlFree(name); + } + } + return(ret); +} + +/** + * xmlParseConditionalSections + * @ctxt: an XML parser context + * + * [61] conditionalSect ::= includeSect | ignoreSect + * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' + * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' + * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* + * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) + */ + +static void +xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { + SKIP(3); + SKIP_BLANKS; + if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && + (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && + (NXT(6) == 'E')) { + SKIP(7); + SKIP_BLANKS; + if (RAW != '[') { + ctxt->errNo = XML_ERR_CONDSEC_INVALID; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "XML conditional section '[' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + NEXT; + } + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + xmlGenericError(xmlGenericErrorContext, + "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + xmlGenericError(xmlGenericErrorContext, + "Entering INCLUDE Conditional Section\n"); + } + + while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || + (NXT(2) != '>'))) { + const xmlChar *check = CUR_PTR; + int cons = ctxt->input->consumed; + + if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { + xmlParseConditionalSections(ctxt); + } else if (IS_BLANK(CUR)) { + NEXT; + } else if (RAW == '%') { + xmlParsePEReference(ctxt); + } else + xmlParseMarkupDecl(ctxt); + + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + + if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { + ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Content error in the external subset\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + } + } + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + xmlGenericError(xmlGenericErrorContext, + "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + xmlGenericError(xmlGenericErrorContext, + "Leaving INCLUDE Conditional Section\n"); + } + + } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && + (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { + int state; + int instate; + int depth = 0; + + SKIP(6); + SKIP_BLANKS; + if (RAW != '[') { + ctxt->errNo = XML_ERR_CONDSEC_INVALID; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "XML conditional section '[' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + NEXT; + } + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + xmlGenericError(xmlGenericErrorContext, + "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + xmlGenericError(xmlGenericErrorContext, + "Entering IGNORE Conditional Section\n"); + } + + /* + * Parse up to the end of the conditional section + * But disable SAX event generating DTD building in the meantime + */ + state = ctxt->disableSAX; + instate = ctxt->instate; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_IGNORE; + + while ((depth >= 0) && (RAW != 0)) { + if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { + depth++; + SKIP(3); + continue; + } + if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { + if (--depth >= 0) SKIP(3); + continue; + } + NEXT; + continue; + } + + ctxt->disableSAX = state; + ctxt->instate = instate; + + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + xmlGenericError(xmlGenericErrorContext, + "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + xmlGenericError(xmlGenericErrorContext, + "Leaving IGNORE Conditional Section\n"); + } + + } else { + ctxt->errNo = XML_ERR_CONDSEC_INVALID; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "XML conditional section INCLUDE or IGNORE keyword expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + if (RAW == 0) + SHRINK; + + if (RAW == 0) { + ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "XML conditional section not closed\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + SKIP(3); + } +} + +/** + * xmlParseMarkupDecl: + * @ctxt: an XML parser context + * + * parse Markup declarations + * + * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | + * NotationDecl | PI | Comment + * + * [ VC: Proper Declaration/PE Nesting ] + * Parameter-entity replacement text must be properly nested with + * markup declarations. That is to say, if either the first character + * or the last character of a markup declaration (markupdecl above) is + * contained in the replacement text for a parameter-entity reference, + * both must be contained in the same replacement text. + * + * [ WFC: PEs in Internal Subset ] + * In the internal DTD subset, parameter-entity references can occur + * only where markup declarations can occur, not within markup declarations. + * (This does not apply to references that occur in external parameter + * entities or to the external subset.) + */ +void +xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { + GROW; + xmlParseElementDecl(ctxt); + xmlParseAttributeListDecl(ctxt); + xmlParseEntityDecl(ctxt); + xmlParseNotationDecl(ctxt); + xmlParsePI(ctxt); + xmlParseComment(ctxt); + /* + * This is only for internal subset. On external entities, + * the replacement is done before parsing stage + */ + if ((ctxt->external == 0) && (ctxt->inputNr == 1)) + xmlParsePEReference(ctxt); + + /* + * Conditional sections are allowed from entities included + * by PE References in the internal subset. + */ + if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { + if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { + xmlParseConditionalSections(ctxt); + } + } + + ctxt->instate = XML_PARSER_DTD; +} + +/** + * xmlParseTextDecl: + * @ctxt: an XML parser context + * + * parse an XML declaration header for external entities + * + * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' + * + * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? + */ + +void +xmlParseTextDecl(xmlParserCtxtPtr ctxt) { + xmlChar *version; + + /* + * We know that '<?xml' is here. + */ + if ((RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + SKIP(5); + } else { + ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Text declaration '<?xml' required\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + + return; + } + + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space needed after '<?xml'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + + /* + * We may have the VersionInfo here. + */ + version = xmlParseVersionInfo(ctxt); + if (version == NULL) + version = xmlCharStrdup(XML_DEFAULT_VERSION); + else { + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Space needed here\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + ctxt->input->version = version; + + /* + * We must have the encoding declaration + */ + xmlParseEncodingDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing right here + */ + return; + } + + SKIP_BLANKS; + if ((RAW == '?') && (NXT(1) == '>')) { + SKIP(2); + } else if (RAW == '>') { + /* Deprecated old WD ... */ + ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "XML declaration must end-up with '?>'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + NEXT; + } else { + ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "parsing XML declaration: '?>' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + MOVETO_ENDTAG(CUR_PTR); + NEXT; + } +} + +/** + * xmlParseExternalSubset: + * @ctxt: an XML parser context + * @ExternalID: the external identifier + * @SystemID: the system identifier (or URL) + * + * parse Markup declarations from an external subset + * + * [30] extSubset ::= textDecl? extSubsetDecl + * + * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * + */ +void +xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, + const xmlChar *SystemID) { + GROW; + if ((RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l')) { + xmlParseTextDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing right here + */ + ctxt->instate = XML_PARSER_EOF; + return; + } + } + if (ctxt->myDoc == NULL) { + ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + } + if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) + xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); + + ctxt->instate = XML_PARSER_DTD; + ctxt->external = 1; + while (((RAW == '<') && (NXT(1) == '?')) || + ((RAW == '<') && (NXT(1) == '!')) || + (RAW == '%') || IS_BLANK(CUR)) { + const xmlChar *check = CUR_PTR; + int cons = ctxt->input->consumed; + + GROW; + if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { + xmlParseConditionalSections(ctxt); + } else if (IS_BLANK(CUR)) { + NEXT; + } else if (RAW == '%') { + xmlParsePEReference(ctxt); + } else + xmlParseMarkupDecl(ctxt); + + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + + if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { + ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Content error in the external subset\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + } + } + + if (RAW != 0) { + ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + +} + +/** + * xmlParseReference: + * @ctxt: an XML parser context + * + * parse and handle entity references in content, depending on the SAX + * interface, this may end-up in a call to character() if this is a + * CharRef, a predefined entity, if there is no reference() callback. + * or if the parser was asked to switch to that mode. + * + * [67] Reference ::= EntityRef | CharRef + */ +void +xmlParseReference(xmlParserCtxtPtr ctxt) { + xmlEntityPtr ent; + xmlChar *val; + if (RAW != '&') return; + + if (NXT(1) == '#') { + int i = 0; + xmlChar out[10]; + int hex = NXT(2); + int value = xmlParseCharRef(ctxt); + + if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { + /* + * So we are using non-UTF-8 buffers + * Check that the char fit on 8bits, if not + * generate a CharRef. + */ + if (value <= 0xFF) { + out[0] = value; + out[1] = 0; + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->characters(ctxt->userData, out, 1); + } else { + if ((hex == 'x') || (hex == 'X')) + snprintf((char *)out, sizeof(out), "#x%X", value); + else + snprintf((char *)out, sizeof(out), "#%d", value); + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->reference(ctxt->userData, out); + } + } else { + /* + * Just encode the value in UTF-8 + */ + COPY_BUF(0 ,out, i, value); + out[i] = 0; + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->characters(ctxt->userData, out, i); + } + } else { + ent = xmlParseEntityRef(ctxt); + if (ent == NULL) return; + if (!ctxt->wellFormed) + return; + if ((ent->name != NULL) && + (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { + xmlNodePtr list = NULL; + int ret; + + + /* + * The first reference to the entity trigger a parsing phase + * where the ent->children is filled with the result from + * the parsing. + */ + if (ent->children == NULL) { + xmlChar *value; + value = ent->content; + + /* + * Check that this entity is well formed + */ + if ((value != NULL) && + (value[1] == 0) && (value[0] == '<') && + (xmlStrEqual(ent->name, BAD_CAST "lt"))) { + /* + * DONE: get definite answer on this !!! + * Lots of entity decls are used to declare a single + * char + * <!ENTITY lt "<"> + * Which seems to be valid since + * 2.4: The ampersand character (&) and the left angle + * bracket (<) may appear in their literal form only + * when used ... They are also legal within the literal + * entity value of an internal entity declaration;i + * see "4.3.2 Well-Formed Parsed Entities". + * IMHO 2.4 and 4.3.2 are directly in contradiction. + * Looking at the OASIS test suite and James Clark + * tests, this is broken. However the XML REC uses + * it. Is the XML REC not well-formed ???? + * This is a hack to avoid this problem + * + * ANSWER: since lt gt amp .. are already defined, + * this is a redefinition and hence the fact that the + * content is not well balanced is not a Wf error, this + * is lousy but acceptable. + */ + list = xmlNewDocText(ctxt->myDoc, value); + if (list != NULL) { + if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && + (ent->children == NULL)) { + ent->children = list; + ent->last = list; + list->parent = (xmlNodePtr) ent; + } else { + xmlFreeNodeList(list); + } + } else if (list != NULL) { + xmlFreeNodeList(list); + } + } else { + /* + * 4.3.2: An internal general parsed entity is well-formed + * if its replacement text matches the production labeled + * content. + */ + + void *user_data; + /* + * This is a bit hackish but this seems the best + * way to make sure both SAX and DOM entity support + * behaves okay. + */ + if (ctxt->userData == ctxt) + user_data = NULL; + else + user_data = ctxt->userData; + + if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { + ctxt->depth++; + ret = xmlParseBalancedChunkMemoryInternal(ctxt, + value, user_data, &list); + ctxt->depth--; + } else if (ent->etype == + XML_EXTERNAL_GENERAL_PARSED_ENTITY) { + ctxt->depth++; + ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, + ctxt->sax, user_data, ctxt->depth, + ent->URI, ent->ExternalID, &list); + ctxt->depth--; + } else { + ret = -1; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Internal: invalid entity type\n"); + } + if (ret == XML_ERR_ENTITY_LOOP) { + ctxt->errNo = XML_ERR_ENTITY_LOOP; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Detected entity reference loop\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } else if ((ret == 0) && (list != NULL)) { + if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& + (ent->children == NULL)) { + ent->children = list; + if (ctxt->replaceEntities) { + /* + * Prune it directly in the generated document + * except for single text nodes. + */ + if ((list->type == XML_TEXT_NODE) && + (list->next == NULL)) { + list->parent = (xmlNodePtr) ent; + list = NULL; + } else { + while (list != NULL) { + list->parent = (xmlNodePtr) ctxt->node; + list->doc = ctxt->myDoc; + if (list->next == NULL) + ent->last = list; + list = list->next; + } + list = ent->children; + if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddEntityReference(ent, list, NULL); + } + } else { + while (list != NULL) { + list->parent = (xmlNodePtr) ent; + if (list->next == NULL) + ent->last = list; + list = list->next; + } + } + } else { + xmlFreeNodeList(list); + list = NULL; + } + } else if (ret > 0) { + ctxt->errNo = ret; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity value required\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else if (list != NULL) { + xmlFreeNodeList(list); + list = NULL; + } + } + } + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + /* + * Create a node. + */ + ctxt->sax->reference(ctxt->userData, ent->name); + return; + } else if (ctxt->replaceEntities) { + if ((ctxt->node != NULL) && (ent->children != NULL)) { + /* + * Seems we are generating the DOM content, do + * a simple tree copy for all references except the first + * In the first occurrence list contains the replacement + */ + if (list == NULL) { + xmlNodePtr new = NULL, cur, firstChild = NULL; + cur = ent->children; + while (cur != NULL) { + new = xmlCopyNode(cur, 1); + if (firstChild == NULL){ + firstChild = new; + } + xmlAddChild(ctxt->node, new); + if (cur == ent->last) + break; + cur = cur->next; + } + if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddEntityReference(ent, firstChild, new); + } else { + /* + * the name change is to avoid coalescing of the + * node with a possible previous text one which + * would make ent->children a dangling pointer + */ + if (ent->children->type == XML_TEXT_NODE) + ent->children->name = xmlStrdup(BAD_CAST "nbktext"); + if ((ent->last != ent->children) && + (ent->last->type == XML_TEXT_NODE)) + ent->last->name = xmlStrdup(BAD_CAST "nbktext"); + xmlAddChildList(ctxt->node, ent->children); + } + + /* + * This is to avoid a nasty side effect, see + * characters() in SAX.c + */ + ctxt->nodemem = 0; + ctxt->nodelen = 0; + return; + } else { + /* + * Probably running in SAX mode + */ + xmlParserInputPtr input; + + input = xmlNewEntityInputStream(ctxt, ent); + xmlPushInput(ctxt, input); + if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && + (RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + xmlParseTextDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing right here + */ + ctxt->instate = XML_PARSER_EOF; + return; + } + if (input->standalone == 1) { + ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "external parsed entities cannot be standalone\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + return; + } + } + } else { + val = ent->content; + if (val == NULL) return; + /* + * inline the entity. + */ + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); + } + } +} + +/** + * xmlParseEntityRef: + * @ctxt: an XML parser context + * + * parse ENTITY references declarations + * + * [68] EntityRef ::= '&' Name ';' + * + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an internal DTD + * subset which contains no parameter entity references, or a document + * with "standalone='yes'", the Name given in the entity reference + * must match that in an entity declaration, except that well-formed + * documents need not declare any of the following entities: amp, lt, + * gt, apos, quot. The declaration of a parameter entity must precede + * any reference to it. Similarly, the declaration of a general entity + * must precede any reference to it which appears in a default value in an + * attribute-list declaration. Note that if entities are declared in the + * external subset or in external parameter entities, a non-validating + * processor is not obligated to read and process their declarations; + * for such documents, the rule that an entity must be declared is a + * well-formedness constraint only if standalone='yes'. + * + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an unparsed entity + * + * Returns the xmlEntityPtr if found, or NULL otherwise. + */ +xmlEntityPtr +xmlParseEntityRef(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlEntityPtr ent = NULL; + + GROW; + + if (RAW == '&') { + NEXT; + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseEntityRef: no name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (RAW == ';') { + NEXT; + /* + * Ask first SAX for entity resolution, otherwise try the + * predefined set. + */ + if (ctxt->sax != NULL) { + if (ctxt->sax->getEntity != NULL) + ent = ctxt->sax->getEntity(ctxt->userData, name); + if (ent == NULL) + ent = xmlGetPredefinedEntity(name); + if ((ent == NULL) && (ctxt->userData==ctxt)) { + ent = getEntity(ctxt, name); + } + } + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", the + * Name given in the entity reference must match that in an + * entity declaration, except that well-formed documents + * need not declare any of the following entities: amp, lt, + * gt, apos, quot. + * The declaration of a parameter entity must precede any + * reference to it. + * Similarly, the declaration of a general entity must + * precede any reference to it which appears in a default + * value in an attribute-list declaration. Note that if + * entities are declared in the external subset or in + * external parameter entities, a non-validating processor + * is not obligated to read and process their declarations; + * for such documents, the rule that an entity must be + * declared is a well-formedness constraint only if + * standalone='yes'. + */ + if (ent == NULL) { + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity '%s' not defined\n", name); + ctxt->wellFormed = 0; + ctxt->valid = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity '%s' not defined\n", name); + ctxt->valid = 0; + } + } + + /* + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an + * unparsed entity + */ + else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + ctxt->errNo = XML_ERR_UNPARSED_ENTITY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity reference to unparsed entity %s\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + /* + * [ WFC: No External Entity References ] + * Attribute values cannot contain direct or indirect + * entity references to external entities. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Attribute references external entity '%s'\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + /* + * [ WFC: No < in Attribute Values ] + * The replacement text of any entity referred to directly or + * indirectly in an attribute value (other than "<") must + * not contain a <. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent != NULL) && + (!xmlStrEqual(ent->name, BAD_CAST "lt")) && + (ent->content != NULL) && + (xmlStrchr(ent->content, '<'))) { + ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "'<' in entity '%s' is not allowed in attributes values\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + /* + * Internal check, no parameter entities here ... + */ + else { + switch (ent->etype) { + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Attempt to reference the parameter entity '%s'\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + default: + break; + } + } + + /* + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive reference + * to itself, either directly or indirectly. + * Done somewhere else + */ + + } else { + ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseEntityRef: expecting ';'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + xmlFree(name); + } + } + return(ent); +} + +/** + * xmlParseStringEntityRef: + * @ctxt: an XML parser context + * @str: a pointer to an index in the string + * + * parse ENTITY references declarations, but this version parses it from + * a string value. + * + * [68] EntityRef ::= '&' Name ';' + * + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an internal DTD + * subset which contains no parameter entity references, or a document + * with "standalone='yes'", the Name given in the entity reference + * must match that in an entity declaration, except that well-formed + * documents need not declare any of the following entities: amp, lt, + * gt, apos, quot. The declaration of a parameter entity must precede + * any reference to it. Similarly, the declaration of a general entity + * must precede any reference to it which appears in a default value in an + * attribute-list declaration. Note that if entities are declared in the + * external subset or in external parameter entities, a non-validating + * processor is not obligated to read and process their declarations; + * for such documents, the rule that an entity must be declared is a + * well-formedness constraint only if standalone='yes'. + * + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an unparsed entity + * + * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer + * is updated to the current location in the string. + */ +xmlEntityPtr +xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { + xmlChar *name; + const xmlChar *ptr; + xmlChar cur; + xmlEntityPtr ent = NULL; + + if ((str == NULL) || (*str == NULL)) + return(NULL); + ptr = *str; + cur = *ptr; + if (cur == '&') { + ptr++; + cur = *ptr; + name = xmlParseStringName(ctxt, &ptr); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringEntityRef: no name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (*ptr == ';') { + ptr++; + /* + * Ask first SAX for entity resolution, otherwise try the + * predefined set. + */ + if (ctxt->sax != NULL) { + if (ctxt->sax->getEntity != NULL) + ent = ctxt->sax->getEntity(ctxt->userData, name); + if (ent == NULL) + ent = xmlGetPredefinedEntity(name); + if ((ent == NULL) && (ctxt->userData==ctxt)) { + ent = getEntity(ctxt, name); + } + } + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", the + * Name given in the entity reference must match that in an + * entity declaration, except that well-formed documents + * need not declare any of the following entities: amp, lt, + * gt, apos, quot. + * The declaration of a parameter entity must precede any + * reference to it. + * Similarly, the declaration of a general entity must + * precede any reference to it which appears in a default + * value in an attribute-list declaration. Note that if + * entities are declared in the external subset or in + * external parameter entities, a non-validating processor + * is not obligated to read and process their declarations; + * for such documents, the rule that an entity must be + * declared is a well-formedness constraint only if + * standalone='yes'. + */ + if (ent == NULL) { + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity '%s' not defined\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Entity '%s' not defined\n", name); + } + } + + /* + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an + * unparsed entity + */ + else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + ctxt->errNo = XML_ERR_UNPARSED_ENTITY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Entity reference to unparsed entity %s\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + /* + * [ WFC: No External Entity References ] + * Attribute values cannot contain direct or indirect + * entity references to external entities. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Attribute references external entity '%s'\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + /* + * [ WFC: No < in Attribute Values ] + * The replacement text of any entity referred to directly or + * indirectly in an attribute value (other than "<") must + * not contain a <. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent != NULL) && + (!xmlStrEqual(ent->name, BAD_CAST "lt")) && + (ent->content != NULL) && + (xmlStrchr(ent->content, '<'))) { + ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "'<' in entity '%s' is not allowed in attributes values\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + /* + * Internal check, no parameter entities here ... + */ + else { + switch (ent->etype) { + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Attempt to reference the parameter entity '%s'\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + default: + break; + } + } + + /* + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive reference + * to itself, either directly or indirectly. + * Done somewhere else + */ + + } else { + ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringEntityRef: expecting ';'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + xmlFree(name); + } + } + *str = ptr; + return(ent); +} + +/** + * xmlParsePEReference: + * @ctxt: an XML parser context + * + * parse PEReference declarations + * The entity content is handled directly by pushing it's content as + * a new input stream. + * + * [69] PEReference ::= '%' Name ';' + * + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive + * reference to itself, either directly or indirectly. + * + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an internal DTD + * subset which contains no parameter entity references, or a document + * with "standalone='yes'", ... ... The declaration of a parameter + * entity must precede any reference to it... + * + * [ VC: Entity Declared ] + * In a document with an external subset or external parameter entities + * with "standalone='no'", ... ... The declaration of a parameter entity + * must precede any reference to it... + * + * [ WFC: In DTD ] + * Parameter-entity references may only appear in the DTD. + * NOTE: misleading but this is handled. + */ +void +xmlParsePEReference(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlEntityPtr entity = NULL; + xmlParserInputPtr input; + + if (RAW == '%') { + NEXT; + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParsePEReference: no name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (RAW == ';') { + NEXT; + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, + name); + if (entity == NULL) { + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; + if ((!ctxt->disableSAX) && + (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PEReference: %%%s; not found\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((!ctxt->disableSAX) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "PEReference: %%%s; not found\n", name); + ctxt->valid = 0; + } + } else { + /* + * Internal checking in case the entity quest barfed + */ + if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && + (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Internal: %%%s; is not a parameter entity\n", name); + } else if (ctxt->input->free != deallocblankswrapper) { + input = xmlNewBlanksWrapperInputStream(ctxt, entity); + xmlPushInput(ctxt, input); + } else { + /* + * TODO !!! + * handle the extra spaces added before and after + * c.f. http://www.w3.org/TR/REC-xml#as-PE + */ + input = xmlNewEntityInputStream(ctxt, entity); + xmlPushInput(ctxt, input); + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + (RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + xmlParseTextDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing + * right here + */ + ctxt->instate = XML_PARSER_EOF; + xmlFree(name); + return; + } + } + } + } + ctxt->hasPErefs = 1; + } else { + ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParsePEReference: expecting ';'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + xmlFree(name); + } + } +} + +/** + * xmlParseStringPEReference: + * @ctxt: an XML parser context + * @str: a pointer to an index in the string + * + * parse PEReference declarations + * + * [69] PEReference ::= '%' Name ';' + * + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive + * reference to itself, either directly or indirectly. + * + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an internal DTD + * subset which contains no parameter entity references, or a document + * with "standalone='yes'", ... ... The declaration of a parameter + * entity must precede any reference to it... + * + * [ VC: Entity Declared ] + * In a document with an external subset or external parameter entities + * with "standalone='no'", ... ... The declaration of a parameter entity + * must precede any reference to it... + * + * [ WFC: In DTD ] + * Parameter-entity references may only appear in the DTD. + * NOTE: misleading but this is handled. + * + * Returns the string of the entity content. + * str is updated to the current value of the index + */ +xmlEntityPtr +xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { + const xmlChar *ptr; + xmlChar cur; + xmlChar *name; + xmlEntityPtr entity = NULL; + + if ((str == NULL) || (*str == NULL)) return(NULL); + ptr = *str; + cur = *ptr; + if (cur == '%') { + ptr++; + cur = *ptr; + name = xmlParseStringName(ctxt, &ptr); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringPEReference: no name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + cur = *ptr; + if (cur == ';') { + ptr++; + cur = *ptr; + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, + name); + if (entity == NULL) { + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PEReference: %%%s; not found\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must + * precede any reference to it... + */ + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "PEReference: %%%s; not found\n", name); + ctxt->valid = 0; + } + } else { + /* + * Internal checking in case the entity quest barfed + */ + if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && + (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Internal: %%%s; is not a parameter entity\n", name); + } + } + ctxt->hasPErefs = 1; + } else { + ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringPEReference: expecting ';'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + xmlFree(name); + } + } + *str = ptr; + return(entity); +} + +/** + * xmlParseDocTypeDecl: + * @ctxt: an XML parser context + * + * parse a DOCTYPE declaration + * + * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? + * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' + * + * [ VC: Root Element Type ] + * The Name in the document type declaration must match the element + * type of the root element. + */ + +void +xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { + xmlChar *name = NULL; + xmlChar *ExternalID = NULL; + xmlChar *URI = NULL; + + /* + * We know that '<!DOCTYPE' has been detected. + */ + SKIP(9); + + SKIP_BLANKS; + + /* + * Parse the DOCTYPE name. + */ + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseDocTypeDecl : no DOCTYPE name !\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + ctxt->intSubName = name; + + SKIP_BLANKS; + + /* + * Check for SystemID and ExternalID + */ + URI = xmlParseExternalID(ctxt, &ExternalID, 1); + + if ((URI != NULL) || (ExternalID != NULL)) { + ctxt->hasExternalSubset = 1; + } + ctxt->extSubURI = URI; + ctxt->extSubSystem = ExternalID; + + SKIP_BLANKS; + + /* + * Create and update the internal subset. + */ + if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); + + /* + * Is there any internal subset declarations ? + * they are handled separately in xmlParseInternalSubset() + */ + if (RAW == '[') + return; + + /* + * We should be at the end of the DOCTYPE declaration. + */ + if (RAW != '>') { + ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + NEXT; +} + +/** + * xmlParseInternalSubset: + * @ctxt: an XML parser context + * + * parse the internal subset declaration + * + * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' + */ + +static void +xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { + /* + * Is there any DTD definition ? + */ + if (RAW == '[') { + ctxt->instate = XML_PARSER_DTD; + NEXT; + /* + * Parse the succession of Markup declarations and + * PEReferences. + * Subsequence (markupdecl | PEReference | S)* + */ + while (RAW != ']') { + const xmlChar *check = CUR_PTR; + int cons = ctxt->input->consumed; + + SKIP_BLANKS; + xmlParseMarkupDecl(ctxt); + xmlParsePEReference(ctxt); + + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + + if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseInternalSubset: error detected in Markup declaration\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + } + } + if (RAW == ']') { + NEXT; + SKIP_BLANKS; + } + } + + /* + * We should be at the end of the DOCTYPE declaration. + */ + if (RAW != '>') { + ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + NEXT; +} + +/** + * xmlParseAttribute: + * @ctxt: an XML parser context + * @value: a xmlChar ** used to store the value of the attribute + * + * parse an attribute + * + * [41] Attribute ::= Name Eq AttValue + * + * [ WFC: No External Entity References ] + * Attribute values cannot contain direct or indirect entity references + * to external entities. + * + * [ WFC: No < in Attribute Values ] + * The replacement text of any entity referred to directly or indirectly in + * an attribute value (other than "<") must not contain a <. + * + * [ VC: Attribute Value Type ] + * The attribute must have been declared; the value must be of the type + * declared for it. + * + * [25] Eq ::= S? '=' S? + * + * With namespace: + * + * [NS 11] Attribute ::= QName Eq AttValue + * + * Also the case QName == xmlns:??? is handled independently as a namespace + * definition. + * + * Returns the attribute name, and the value in *value. + */ + +xmlChar * +xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { + xmlChar *name, *val; + + *value = NULL; + GROW; + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + + /* + * read the value + */ + SKIP_BLANKS; + if (RAW == '=') { + NEXT; + SKIP_BLANKS; + val = xmlParseAttValue(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + } else { + ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Specification mandate value for attribute %s\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + xmlFree(name); + return(NULL); + } + + /* + * Check that xml:lang conforms to the specification + * No more registered as an error, just generate a warning now + * since this was deprecated in XML second edition + */ + if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { + if (!xmlCheckLanguageID(val)) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Malformed value for xml:lang : %s\n", val); + } + } + + /* + * Check that xml:space conforms to the specification + */ + if (xmlStrEqual(name, BAD_CAST "xml:space")) { + if (xmlStrEqual(val, BAD_CAST "default")) + *(ctxt->space) = 0; + else if (xmlStrEqual(val, BAD_CAST "preserve")) + *(ctxt->space) = 1; + else { + ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, +"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n", + val); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + + *value = val; + return(name); +} + +/** + * xmlParseStartTag: + * @ctxt: an XML parser context + * + * parse a start of tag either for rule element or + * EmptyElement. In both case we don't parse the tag closing chars. + * + * [40] STag ::= '<' Name (S Attribute)* S? '>' + * + * [ WFC: Unique Att Spec ] + * No attribute name may appear more than once in the same start-tag or + * empty-element tag. + * + * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' + * + * [ WFC: Unique Att Spec ] + * No attribute name may appear more than once in the same start-tag or + * empty-element tag. + * + * With namespace: + * + * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' + * + * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' + * + * Returns the element name parsed + */ + +xmlChar * +xmlParseStartTag(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *attname; + xmlChar *attvalue; + const xmlChar **atts = NULL; + int nbatts = 0; + int maxatts = 0; + int i; + + if (RAW != '<') return(NULL); + NEXT1; + + name = xmlParseName(ctxt); + if (name == NULL) { + ctxt->errNo = XML_ERR_NAME_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStartTag: invalid element name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + + /* + * Now parse the attributes, it ends up with the ending + * + * (S Attribute)* S? + */ + SKIP_BLANKS; + GROW; + + while ((RAW != '>') && + ((RAW != '/') || (NXT(1) != '>')) && + (IS_CHAR(RAW))) { + const xmlChar *q = CUR_PTR; + int cons = ctxt->input->consumed; + + attname = xmlParseAttribute(ctxt, &attvalue); + if ((attname != NULL) && (attvalue != NULL)) { + /* + * [ WFC: Unique Att Spec ] + * No attribute name may appear more than once in the same + * start-tag or empty-element tag. + */ + for (i = 0; i < nbatts;i += 2) { + if (xmlStrEqual(atts[i], attname)) { + ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Attribute %s redefined\n", + attname); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + xmlFree(attname); + xmlFree(attvalue); + goto failed; + } + } + + /* + * Add the pair to atts + */ + if (atts == NULL) { + maxatts = 10; + atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); + if (atts == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %ld byte failed\n", + maxatts * (long)sizeof(xmlChar *)); + return(NULL); + } + } else if (nbatts + 4 > maxatts) { + maxatts *= 2; + atts = (const xmlChar **) xmlRealloc((void *) atts, + maxatts * sizeof(xmlChar *)); + if (atts == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %ld byte failed\n", + maxatts * (long)sizeof(xmlChar *)); + return(NULL); + } + } + atts[nbatts++] = attname; + atts[nbatts++] = attvalue; + atts[nbatts] = NULL; + atts[nbatts + 1] = NULL; + } else { + if (attname != NULL) + xmlFree(attname); + if (attvalue != NULL) + xmlFree(attvalue); + } + +failed: + + if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) + break; + if (!IS_BLANK(RAW)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "attributes construct error\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStartTag: problem parsing attributes\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + } + GROW; + } + + /* + * SAX: Start of Element ! + */ + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->startElement(ctxt->userData, name, atts); + + if (atts != NULL) { + for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); + xmlFree((void *) atts); + } + return(name); +} + +/** + * xmlParseEndTag: + * @ctxt: an XML parser context + * + * parse an end of tag + * + * [42] ETag ::= '</' Name S? '>' + * + * With namespace + * + * [NS 9] ETag ::= '</' QName S? '>' + */ + +void +xmlParseEndTag(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *oldname; + + GROW; + if ((RAW != '<') || (NXT(1) != '/')) { + ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + SKIP(2); + + name = xmlParseNameAndCompare(ctxt,ctxt->name); + + /* + * We should definitely be at the ending "S? '>'" part + */ + GROW; + SKIP_BLANKS; + if ((!IS_CHAR(RAW)) || (RAW != '>')) { + ctxt->errNo = XML_ERR_GT_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + NEXT1; + + /* + * [ WFC: Element Type Match ] + * The Name in an element's end-tag must match the element type in the + * start-tag. + * + */ + if (name != (xmlChar*)1) { + ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { + if (name != NULL) { + ctxt->sax->error(ctxt->userData, + "Opening and ending tag mismatch: %s and %s\n", + ctxt->name, name); + } else { + ctxt->sax->error(ctxt->userData, + "Ending tag error for: %s\n", ctxt->name); + } + + } + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; +#if 0 + else { + /* + * Recover in case of one missing close + */ + if ((ctxt->nameNr > 2) && + (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) { + namePop(ctxt); + spacePop(ctxt); + } + } +#endif + if (name != NULL) + xmlFree(name); + } + + /* + * SAX: End of Tag + */ + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + + oldname = namePop(ctxt); + spacePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + return; +} + +/** + * xmlParseCDSect: + * @ctxt: an XML parser context + * + * Parse escaped pure raw content. + * + * [18] CDSect ::= CDStart CData CDEnd + * + * [19] CDStart ::= '<![CDATA[' + * + * [20] Data ::= (Char* - (Char* ']]>' Char*)) + * + * [21] CDEnd ::= ']]>' + */ +void +xmlParseCDSect(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + int r, rl; + int s, sl; + int cur, l; + int count = 0; + + if ((NXT(0) == '<') && (NXT(1) == '!') && + (NXT(2) == '[') && (NXT(3) == 'C') && + (NXT(4) == 'D') && (NXT(5) == 'A') && + (NXT(6) == 'T') && (NXT(7) == 'A') && + (NXT(8) == '[')) { + SKIP(9); + } else + return; + + ctxt->instate = XML_PARSER_CDATA_SECTION; + r = CUR_CHAR(rl); + if (!IS_CHAR(r)) { + ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "CData section not finished\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_CONTENT; + return; + } + NEXTL(rl); + s = CUR_CHAR(sl); + if (!IS_CHAR(s)) { + ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "CData section not finished\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_CONTENT; + return; + } + NEXTL(sl); + cur = CUR_CHAR(l); + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return; + } + while (IS_CHAR(cur) && + ((r != ']') || (s != ']') || (cur != '>'))) { + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return; + } + } + COPY_BUF(rl,buf,len,r); + r = s; + rl = sl; + s = cur; + sl = l; + count++; + if (count > 50) { + GROW; + count = 0; + } + NEXTL(l); + cur = CUR_CHAR(l); + } + buf[len] = 0; + ctxt->instate = XML_PARSER_CONTENT; + if (cur != '>') { + ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "CData section not finished\n%.50s\n", buf); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + xmlFree(buf); + return; + } + NEXTL(l); + + /* + * OK the buffer is to be consumed as cdata. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (ctxt->sax->cdataBlock != NULL) + ctxt->sax->cdataBlock(ctxt->userData, buf, len); + else if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, len); + } + xmlFree(buf); +} + +/** + * xmlParseContent: + * @ctxt: an XML parser context + * + * Parse a content: + * + * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* + */ + +void +xmlParseContent(xmlParserCtxtPtr ctxt) { + GROW; + while ((RAW != 0) && + ((RAW != '<') || (NXT(1) != '/'))) { + const xmlChar *test = CUR_PTR; + int cons = ctxt->input->consumed; + const xmlChar *cur = ctxt->input->cur; + + /* + * First case : a Processing Instruction. + */ + if ((*cur == '<') && (cur[1] == '?')) { + xmlParsePI(ctxt); + } + + /* + * Second case : a CDSection + */ + else if ((*cur == '<') && (NXT(1) == '!') && + (NXT(2) == '[') && (NXT(3) == 'C') && + (NXT(4) == 'D') && (NXT(5) == 'A') && + (NXT(6) == 'T') && (NXT(7) == 'A') && + (NXT(8) == '[')) { + xmlParseCDSect(ctxt); + } + + /* + * Third case : a comment + */ + else if ((*cur == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) { + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + } + + /* + * Fourth case : a sub-element. + */ + else if (*cur == '<') { + xmlParseElement(ctxt); + } + + /* + * Fifth case : a reference. If if has not been resolved, + * parsing returns it's Name, create the node + */ + + else if (*cur == '&') { + xmlParseReference(ctxt); + } + + /* + * Last case, text. Note that References are handled directly. + */ + else { + xmlParseCharData(ctxt, 0); + } + + GROW; + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + SHRINK; + + if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "detected an error in element content\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_EOF; + break; + } + } +} + +/** + * xmlParseElement: + * @ctxt: an XML parser context + * + * parse an XML element, this is highly recursive + * + * [39] element ::= EmptyElemTag | STag content ETag + * + * [ WFC: Element Type Match ] + * The Name in an element's end-tag must match the element type in the + * start-tag. + * + * [ VC: Element Valid ] + * An element is valid if there is a declaration matching elementdecl + * where the Name matches the element type and one of the following holds: + * - The declaration matches EMPTY and the element has no content. + * - The declaration matches children and the sequence of child elements + * belongs to the language generated by the regular expression in the + * content model, with optional white space (characters matching the + * nonterminal S) between each pair of child elements. + * - The declaration matches Mixed and the content consists of character + * data and child elements whose types match names in the content model. + * - The declaration matches ANY, and the types of any child elements have + * been declared. + */ + +void +xmlParseElement(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlChar *oldname; + xmlParserNodeInfo node_info; + xmlNodePtr ret; + + /* Capture start position */ + if (ctxt->record_info) { + node_info.begin_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.begin_line = ctxt->input->line; + } + + if (ctxt->spaceNr == 0) + spacePush(ctxt, -1); + else + spacePush(ctxt, *ctxt->space); + + name = xmlParseStartTag(ctxt); + if (name == NULL) { + spacePop(ctxt); + return; + } + namePush(ctxt, name); + ret = ctxt->node; + + /* + * [ VC: Root Element Type ] + * The Name in the document type declaration must match the element + * type of the root element. + */ + if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && + ctxt->node && (ctxt->node == ctxt->myDoc->children)) + ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); + + /* + * Check for an Empty Element. + */ + if ((RAW == '/') && (NXT(1) == '>')) { + SKIP(2); + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = namePop(ctxt); + spacePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + if ( ret != NULL && ctxt->record_info ) { + node_info.end_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.end_line = ctxt->input->line; + node_info.node = ret; + xmlParserAddNodeInfo(ctxt, &node_info); + } + return; + } + if (RAW == '>') { + NEXT1; + } else { + ctxt->errNo = XML_ERR_GT_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Couldn't find end of Start Tag %s\n", + name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + + /* + * end of parsing of this node. + */ + nodePop(ctxt); + oldname = namePop(ctxt); + spacePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + + /* + * Capture end position and add node + */ + if ( ret != NULL && ctxt->record_info ) { + node_info.end_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.end_line = ctxt->input->line; + node_info.node = ret; + xmlParserAddNodeInfo(ctxt, &node_info); + } + return; + } + + /* + * Parse the content of the element: + */ + xmlParseContent(ctxt); + if (!IS_CHAR(RAW)) { + ctxt->errNo = XML_ERR_TAG_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Premature end of data in tag %s\n", name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + + /* + * end of parsing of this node. + */ + nodePop(ctxt); + oldname = namePop(ctxt); + spacePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + return; + } + + /* + * parse the end of tag: '</' should be here. + */ + xmlParseEndTag(ctxt); + + /* + * Capture end position and add node + */ + if ( ret != NULL && ctxt->record_info ) { + node_info.end_pos = ctxt->input->consumed + + (CUR_PTR - ctxt->input->base); + node_info.end_line = ctxt->input->line; + node_info.node = ret; + xmlParserAddNodeInfo(ctxt, &node_info); + } +} + +/** + * xmlParseVersionNum: + * @ctxt: an XML parser context + * + * parse the XML version value. + * + * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ + * + * Returns the string giving the XML version number, or NULL + */ +xmlChar * +xmlParseVersionNum(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = 10; + xmlChar cur; + + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + cur = CUR; + while (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z')) || + ((cur >= '0') && (cur <= '9')) || + (cur == '_') || (cur == '.') || + (cur == ':') || (cur == '-')) { + if (len + 1 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + buf[len++] = cur; + NEXT; + cur=CUR; + } + buf[len] = 0; + return(buf); +} + +/** + * xmlParseVersionInfo: + * @ctxt: an XML parser context + * + * parse the XML version. + * + * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") + * + * [25] Eq ::= S? '=' S? + * + * Returns the version string, e.g. "1.0" + */ + +xmlChar * +xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { + xmlChar *version = NULL; + const xmlChar *q; + + if ((RAW == 'v') && (NXT(1) == 'e') && + (NXT(2) == 'r') && (NXT(3) == 's') && + (NXT(4) == 'i') && (NXT(5) == 'o') && + (NXT(6) == 'n')) { + SKIP(7); + SKIP_BLANKS; + if (RAW != '=') { + ctxt->errNo = XML_ERR_EQUAL_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseVersionInfo : expected '='\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + NEXT; + SKIP_BLANKS; + if (RAW == '"') { + NEXT; + q = CUR_PTR; + version = xmlParseVersionNum(ctxt); + if (RAW != '"') { + ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "String not closed\n%.50s\n", q); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + NEXT; + } else if (RAW == '\''){ + NEXT; + q = CUR_PTR; + version = xmlParseVersionNum(ctxt); + if (RAW != '\'') { + ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "String not closed\n%.50s\n", q); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + NEXT; + } else { + ctxt->errNo = XML_ERR_STRING_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseVersionInfo : expected ' or \"\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + return(version); +} + +/** + * xmlParseEncName: + * @ctxt: an XML parser context + * + * parse the XML encoding name + * + * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* + * + * Returns the encoding name value or NULL + */ +xmlChar * +xmlParseEncName(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = 10; + xmlChar cur; + + cur = CUR; + if (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z'))) { + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + + buf[len++] = cur; + NEXT; + cur = CUR; + while (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z')) || + ((cur >= '0') && (cur <= '9')) || + (cur == '.') || (cur == '_') || + (cur == '-')) { + if (len + 1 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + buf[len++] = cur; + NEXT; + cur = CUR; + if (cur == 0) { + SHRINK; + GROW; + cur = CUR; + } + } + buf[len] = 0; + } else { + ctxt->errNo = XML_ERR_ENCODING_NAME; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + return(buf); +} + +/** + * xmlParseEncodingDecl: + * @ctxt: an XML parser context + * + * parse the XML encoding declaration + * + * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") + * + * this setups the conversion filters. + * + * Returns the encoding value or NULL + */ + +xmlChar * +xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { + xmlChar *encoding = NULL; + const xmlChar *q; + + SKIP_BLANKS; + if ((RAW == 'e') && (NXT(1) == 'n') && + (NXT(2) == 'c') && (NXT(3) == 'o') && + (NXT(4) == 'd') && (NXT(5) == 'i') && + (NXT(6) == 'n') && (NXT(7) == 'g')) { + SKIP(8); + SKIP_BLANKS; + if (RAW != '=') { + ctxt->errNo = XML_ERR_EQUAL_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseEncodingDecl : expected '='\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(NULL); + } + NEXT; + SKIP_BLANKS; + if (RAW == '"') { + NEXT; + q = CUR_PTR; + encoding = xmlParseEncName(ctxt); + if (RAW != '"') { + ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "String not closed\n%.50s\n", q); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + NEXT; + } else if (RAW == '\''){ + NEXT; + q = CUR_PTR; + encoding = xmlParseEncName(ctxt); + if (RAW != '\'') { + ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "String not closed\n%.50s\n", q); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + NEXT; + } else { + ctxt->errNo = XML_ERR_STRING_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseEncodingDecl : expected ' or \"\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (encoding != NULL) { + xmlCharEncoding enc; + xmlCharEncodingHandlerPtr handler; + + if (ctxt->input->encoding != NULL) + xmlFree((xmlChar *) ctxt->input->encoding); + ctxt->input->encoding = encoding; + + enc = xmlParseCharEncoding((const char *) encoding); + /* + * registered set of known encodings + */ + if (enc != XML_CHAR_ENCODING_ERROR) { + xmlSwitchEncoding(ctxt, enc); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + ctxt->input->encoding = NULL; + xmlFree(encoding); + return(NULL); + } + } else { + /* + * fallback for unknown encodings + */ + handler = xmlFindCharEncodingHandler((const char *) encoding); + if (handler != NULL) { + xmlSwitchToEncoding(ctxt, handler); + } else { + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Unsupported encoding %s\n", encoding); + return(NULL); + } + } + } + } + return(encoding); +} + +/** + * xmlParseSDDecl: + * @ctxt: an XML parser context + * + * parse the XML standalone declaration + * + * [32] SDDecl ::= S 'standalone' Eq + * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) + * + * [ VC: Standalone Document Declaration ] + * TODO The standalone document declaration must have the value "no" + * if any external markup declarations contain declarations of: + * - attributes with default values, if elements to which these + * attributes apply appear in the document without specifications + * of values for these attributes, or + * - entities (other than amp, lt, gt, apos, quot), if references + * to those entities appear in the document, or + * - attributes with values subject to normalization, where the + * attribute appears in the document with a value which will change + * as a result of normalization, or + * - element types with element content, if white space occurs directly + * within any instance of those types. + * + * Returns 1 if standalone, 0 otherwise + */ + +int +xmlParseSDDecl(xmlParserCtxtPtr ctxt) { + int standalone = -1; + + SKIP_BLANKS; + if ((RAW == 's') && (NXT(1) == 't') && + (NXT(2) == 'a') && (NXT(3) == 'n') && + (NXT(4) == 'd') && (NXT(5) == 'a') && + (NXT(6) == 'l') && (NXT(7) == 'o') && + (NXT(8) == 'n') && (NXT(9) == 'e')) { + SKIP(10); + SKIP_BLANKS; + if (RAW != '=') { + ctxt->errNo = XML_ERR_EQUAL_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "XML standalone declaration : expected '='\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return(standalone); + } + NEXT; + SKIP_BLANKS; + if (RAW == '\''){ + NEXT; + if ((RAW == 'n') && (NXT(1) == 'o')) { + standalone = 0; + SKIP(2); + } else if ((RAW == 'y') && (NXT(1) == 'e') && + (NXT(2) == 's')) { + standalone = 1; + SKIP(3); + } else { + ctxt->errNo = XML_ERR_STANDALONE_VALUE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "standalone accepts only 'yes' or 'no'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (RAW != '\'') { + ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "String not closed\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + NEXT; + } else if (RAW == '"'){ + NEXT; + if ((RAW == 'n') && (NXT(1) == 'o')) { + standalone = 0; + SKIP(2); + } else if ((RAW == 'y') && (NXT(1) == 'e') && + (NXT(2) == 's')) { + standalone = 1; + SKIP(3); + } else { + ctxt->errNo = XML_ERR_STANDALONE_VALUE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "standalone accepts only 'yes' or 'no'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (RAW != '"') { + ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "String not closed\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else + NEXT; + } else { + ctxt->errNo = XML_ERR_STRING_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Standalone value not found\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + return(standalone); +} + +/** + * xmlParseXMLDecl: + * @ctxt: an XML parser context + * + * parse an XML declaration header + * + * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' + */ + +void +xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { + xmlChar *version; + + /* + * We know that '<?xml' is here. + */ + SKIP(5); + + if (!IS_BLANK(RAW)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + + /* + * We must have the VersionInfo here. + */ + version = xmlParseVersionInfo(ctxt); + if (version == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Malformed declaration expecting version\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { + /* + * TODO: Blueberry should be detected here + */ + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n", + version); + } + if (ctxt->version != NULL) + xmlFree((void *) ctxt->version); + ctxt->version = version; + } + + /* + * We may have the encoding declaration + */ + if (!IS_BLANK(RAW)) { + if ((RAW == '?') && (NXT(1) == '>')) { + SKIP(2); + return; + } + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Blank needed here\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + xmlParseEncodingDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing right here + */ + return; + } + + /* + * We may have the standalone status. + */ + if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) { + if ((RAW == '?') && (NXT(1) == '>')) { + SKIP(2); + return; + } + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Blank needed here\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + SKIP_BLANKS; + ctxt->input->standalone = xmlParseSDDecl(ctxt); + + SKIP_BLANKS; + if ((RAW == '?') && (NXT(1) == '>')) { + SKIP(2); + } else if (RAW == '>') { + /* Deprecated old WD ... */ + ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "XML declaration must end-up with '?>'\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + NEXT; + } else { + ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "parsing XML declaration: '?>' expected\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + MOVETO_ENDTAG(CUR_PTR); + NEXT; + } +} + +/** + * xmlParseMisc: + * @ctxt: an XML parser context + * + * parse an XML Misc* optional field. + * + * [27] Misc ::= Comment | PI | S + */ + +void +xmlParseMisc(xmlParserCtxtPtr ctxt) { + while (((RAW == '<') && (NXT(1) == '?')) || + ((RAW == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) || + IS_BLANK(CUR)) { + if ((RAW == '<') && (NXT(1) == '?')) { + xmlParsePI(ctxt); + } else if (IS_BLANK(CUR)) { + NEXT; + } else + xmlParseComment(ctxt); + } +} + +/** + * xmlParseDocument: + * @ctxt: an XML parser context + * + * parse an XML document (and build a tree if using the standard SAX + * interface). + * + * [1] document ::= prolog element Misc* + * + * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? + * + * Returns 0, -1 in case of error. the parser context is augmented + * as a result of the parsing. + */ + +int +xmlParseDocument(xmlParserCtxtPtr ctxt) { + xmlChar start[4]; + xmlCharEncoding enc; + + xmlInitParser(); + + GROW; + + /* + * SAX: beginning of the document processing. + */ + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); + + if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) { + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + } + + + if (CUR == 0) { + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Document is empty\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + /* + * Check for the XMLDecl in the Prolog. + */ + GROW; + if ((RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + + /* + * Note that we will switch encoding on the fly. + */ + xmlParseXMLDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing right here + */ + return(-1); + } + ctxt->standalone = ctxt->input->standalone; + SKIP_BLANKS; + } else { + ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); + } + if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + + /* + * The Misc part of the Prolog + */ + GROW; + xmlParseMisc(ctxt); + + /* + * Then possibly doc type declaration(s) and more Misc + * (doctypedecl Misc*)? + */ + GROW; + if ((RAW == '<') && (NXT(1) == '!') && + (NXT(2) == 'D') && (NXT(3) == 'O') && + (NXT(4) == 'C') && (NXT(5) == 'T') && + (NXT(6) == 'Y') && (NXT(7) == 'P') && + (NXT(8) == 'E')) { + + ctxt->inSubset = 1; + xmlParseDocTypeDecl(ctxt); + if (RAW == '[') { + ctxt->instate = XML_PARSER_DTD; + xmlParseInternalSubset(ctxt); + } + + /* + * Create and update the external subset. + */ + ctxt->inSubset = 2; + if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, + ctxt->extSubSystem, ctxt->extSubURI); + ctxt->inSubset = 0; + + + ctxt->instate = XML_PARSER_PROLOG; + xmlParseMisc(ctxt); + } + + /* + * Time to start parsing the tree itself + */ + GROW; + if (RAW != '<') { + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Start tag expected, '<' not found\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_EOF; + } else { + ctxt->instate = XML_PARSER_CONTENT; + xmlParseElement(ctxt); + ctxt->instate = XML_PARSER_EPILOG; + + + /* + * The Misc part at the end + */ + xmlParseMisc(ctxt); + + if (RAW != 0) { + ctxt->errNo = XML_ERR_DOCUMENT_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + ctxt->instate = XML_PARSER_EOF; + } + + /* + * SAX: end of the document processing. + */ + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + + /* + * Remove locally kept entity definitions if the tree was not built + */ + if ((ctxt->myDoc != NULL) && + (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } + + if (! ctxt->wellFormed) { + ctxt->valid = 0; + return(-1); + } + return(0); +} + +/** + * xmlParseExtParsedEnt: + * @ctxt: an XML parser context + * + * parse a general parsed entity + * An external general parsed entity is well-formed if it matches the + * production labeled extParsedEnt. + * + * [78] extParsedEnt ::= TextDecl? content + * + * Returns 0, -1 in case of error. the parser context is augmented + * as a result of the parsing. + */ + +int +xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { + xmlChar start[4]; + xmlCharEncoding enc; + + xmlDefaultSAXHandlerInit(); + + GROW; + + /* + * SAX: beginning of the document processing. + */ + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); + + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + + + if (CUR == 0) { + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Document is empty\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + /* + * Check for the XMLDecl in the Prolog. + */ + GROW; + if ((RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + + /* + * Note that we will switch encoding on the fly. + */ + xmlParseXMLDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing right here + */ + return(-1); + } + SKIP_BLANKS; + } else { + ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); + } + if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + + /* + * Doing validity checking on chunk doesn't make sense + */ + ctxt->instate = XML_PARSER_CONTENT; + ctxt->validate = 0; + ctxt->loadsubset = 0; + ctxt->depth = 0; + + xmlParseContent(ctxt); + + if ((RAW == '<') && (NXT(1) == '/')) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else if (RAW != 0) { + ctxt->errNo = XML_ERR_EXTRA_CONTENT; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "extra content at the end of well balanced chunk\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + /* + * SAX: end of the document processing. + */ + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + + if (! ctxt->wellFormed) return(-1); + return(0); +} + +/************************************************************************ + * * + * Progressive parsing interfaces * + * * + ************************************************************************/ + +/** + * xmlParseLookupSequence: + * @ctxt: an XML parser context + * @first: the first char to lookup + * @next: the next char to lookup or zero + * @third: the next char to lookup or zero + * + * Try to find if a sequence (first, next, third) or just (first next) or + * (first) is available in the input stream. + * This function has a side effect of (possibly) incrementing ctxt->checkIndex + * to avoid rescanning sequences of bytes, it DOES change the state of the + * parser, do not use liberally. + * + * Returns the index to the current parsing point if the full sequence + * is available, -1 otherwise. + */ +static int +xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, + xmlChar next, xmlChar third) { + int base, len; + xmlParserInputPtr in; + const xmlChar *buf; + + in = ctxt->input; + if (in == NULL) return(-1); + base = in->cur - in->base; + if (base < 0) return(-1); + if (ctxt->checkIndex > base) + base = ctxt->checkIndex; + if (in->buf == NULL) { + buf = in->base; + len = in->length; + } else { + buf = in->buf->buffer->content; + len = in->buf->buffer->use; + } + /* take into account the sequence length */ + if (third) len -= 2; + else if (next) len --; + for (;base < len;base++) { + if (buf[base] == first) { + if (third != 0) { + if ((buf[base + 1] != next) || + (buf[base + 2] != third)) continue; + } else if (next != 0) { + if (buf[base + 1] != next) continue; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + if (next == 0) + xmlGenericError(xmlGenericErrorContext, + "PP: lookup '%c' found at %d\n", + first, base); + else if (third == 0) + xmlGenericError(xmlGenericErrorContext, + "PP: lookup '%c%c' found at %d\n", + first, next, base); + else + xmlGenericError(xmlGenericErrorContext, + "PP: lookup '%c%c%c' found at %d\n", + first, next, third, base); +#endif + return(base - (in->cur - in->base)); + } + } + ctxt->checkIndex = base; +#ifdef DEBUG_PUSH + if (next == 0) + xmlGenericError(xmlGenericErrorContext, + "PP: lookup '%c' failed\n", first); + else if (third == 0) + xmlGenericError(xmlGenericErrorContext, + "PP: lookup '%c%c' failed\n", first, next); + else + xmlGenericError(xmlGenericErrorContext, + "PP: lookup '%c%c%c' failed\n", first, next, third); +#endif + return(-1); +} + +/** + * xmlParseTryOrFinish: + * @ctxt: an XML parser context + * @terminate: last chunk indicator + * + * Try to progress on parsing + * + * Returns zero if no parsing was possible + */ +static int +xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { + int ret = 0; + int avail; + xmlChar cur, next; + +#ifdef DEBUG_PUSH + switch (ctxt->instate) { + case XML_PARSER_EOF: + xmlGenericError(xmlGenericErrorContext, + "PP: try EOF\n"); break; + case XML_PARSER_START: + xmlGenericError(xmlGenericErrorContext, + "PP: try START\n"); break; + case XML_PARSER_MISC: + xmlGenericError(xmlGenericErrorContext, + "PP: try MISC\n");break; + case XML_PARSER_COMMENT: + xmlGenericError(xmlGenericErrorContext, + "PP: try COMMENT\n");break; + case XML_PARSER_PROLOG: + xmlGenericError(xmlGenericErrorContext, + "PP: try PROLOG\n");break; + case XML_PARSER_START_TAG: + xmlGenericError(xmlGenericErrorContext, + "PP: try START_TAG\n");break; + case XML_PARSER_CONTENT: + xmlGenericError(xmlGenericErrorContext, + "PP: try CONTENT\n");break; + case XML_PARSER_CDATA_SECTION: + xmlGenericError(xmlGenericErrorContext, + "PP: try CDATA_SECTION\n");break; + case XML_PARSER_END_TAG: + xmlGenericError(xmlGenericErrorContext, + "PP: try END_TAG\n");break; + case XML_PARSER_ENTITY_DECL: + xmlGenericError(xmlGenericErrorContext, + "PP: try ENTITY_DECL\n");break; + case XML_PARSER_ENTITY_VALUE: + xmlGenericError(xmlGenericErrorContext, + "PP: try ENTITY_VALUE\n");break; + case XML_PARSER_ATTRIBUTE_VALUE: + xmlGenericError(xmlGenericErrorContext, + "PP: try ATTRIBUTE_VALUE\n");break; + case XML_PARSER_DTD: + xmlGenericError(xmlGenericErrorContext, + "PP: try DTD\n");break; + case XML_PARSER_EPILOG: + xmlGenericError(xmlGenericErrorContext, + "PP: try EPILOG\n");break; + case XML_PARSER_PI: + xmlGenericError(xmlGenericErrorContext, + "PP: try PI\n");break; + case XML_PARSER_IGNORE: + xmlGenericError(xmlGenericErrorContext, + "PP: try IGNORE\n");break; + } +#endif + + while (1) { + SHRINK; + + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + + if (ctxt->input ==NULL) break; + if (ctxt->input->buf == NULL) + avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); + else { + /* + * If we are operating on converted input, try to flush + * remainng chars to avoid them stalling in the non-converted + * buffer. + */ + if ((ctxt->input->buf->raw != NULL) && + (ctxt->input->buf->raw->use > 0)) { + int base = ctxt->input->base - + ctxt->input->buf->buffer->content; + int current = ctxt->input->cur - ctxt->input->base; + + xmlParserInputBufferPush(ctxt->input->buf, 0, ""); + ctxt->input->base = ctxt->input->buf->buffer->content + base; + ctxt->input->cur = ctxt->input->base + current; + ctxt->input->end = + &ctxt->input->buf->buffer->content[ + ctxt->input->buf->buffer->use]; + } + avail = ctxt->input->buf->buffer->use - + (ctxt->input->cur - ctxt->input->base); + } + if (avail < 1) + goto done; + switch (ctxt->instate) { + case XML_PARSER_EOF: + /* + * Document parsing is done ! + */ + goto done; + case XML_PARSER_START: + if (ctxt->charset == XML_CHAR_ENCODING_NONE) { + xmlChar start[4]; + xmlCharEncoding enc; + + /* + * Very first chars read from the document flow. + */ + if (avail < 4) + goto done; + + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + break; + } + + cur = ctxt->input->cur[0]; + next = ctxt->input->cur[1]; + if (cur == 0) { + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Document is empty\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + if ((cur == '<') && (next == '?')) { + /* PI or XML decl */ + if (avail < 5) return(ret); + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + return(ret); + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + if ((ctxt->input->cur[2] == 'x') && + (ctxt->input->cur[3] == 'm') && + (ctxt->input->cur[4] == 'l') && + (IS_BLANK(ctxt->input->cur[5]))) { + ret += 5; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing XML Decl\n"); +#endif + xmlParseXMLDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing right + * here + */ + ctxt->instate = XML_PARSER_EOF; + return(0); + } + ctxt->standalone = ctxt->input->standalone; + if ((ctxt->encoding == NULL) && + (ctxt->input->encoding != NULL)) + ctxt->encoding = xmlStrdup(ctxt->input->encoding); + if ((ctxt->sax) && (ctxt->sax->startDocument) && + (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + ctxt->instate = XML_PARSER_MISC; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering MISC\n"); +#endif + } else { + ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); + if ((ctxt->sax) && (ctxt->sax->startDocument) && + (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + ctxt->instate = XML_PARSER_MISC; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering MISC\n"); +#endif + } + } else { + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); + if ((ctxt->sax) && (ctxt->sax->startDocument) && + (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + ctxt->instate = XML_PARSER_MISC; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering MISC\n"); +#endif + } + break; + case XML_PARSER_MISC: + SKIP_BLANKS; + if (ctxt->input->buf == NULL) + avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); + else + avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); + if (avail < 2) + goto done; + cur = ctxt->input->cur[0]; + next = ctxt->input->cur[1]; + if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + } else if ((cur == '<') && (next == '!') && + (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_MISC; + } else if ((cur == '<') && (next == '!') && + (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && + (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && + (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && + (ctxt->input->cur[8] == 'E')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing internal subset\n"); +#endif + ctxt->inSubset = 1; + xmlParseDocTypeDecl(ctxt); + if (RAW == '[') { + ctxt->instate = XML_PARSER_DTD; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering DTD\n"); +#endif + } else { + /* + * Create and update the external subset. + */ + ctxt->inSubset = 2; + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->externalSubset != NULL)) + ctxt->sax->externalSubset(ctxt->userData, + ctxt->intSubName, ctxt->extSubSystem, + ctxt->extSubURI); + ctxt->inSubset = 0; + ctxt->instate = XML_PARSER_PROLOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering PROLOG\n"); +#endif + } + } else if ((cur == '<') && (next == '!') && + (avail < 9)) { + goto done; + } else { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering START_TAG\n"); +#endif + } + break; + case XML_PARSER_IGNORE: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == IGNORE"); + ctxt->instate = XML_PARSER_DTD; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering DTD\n"); +#endif + break; + case XML_PARSER_PROLOG: + SKIP_BLANKS; + if (ctxt->input->buf == NULL) + avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); + else + avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); + if (avail < 2) + goto done; + cur = ctxt->input->cur[0]; + next = ctxt->input->cur[1]; + if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + } else if ((cur == '<') && (next == '!') && + (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_PROLOG; + } else if ((cur == '<') && (next == '!') && + (avail < 4)) { + goto done; + } else { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering START_TAG\n"); +#endif + } + break; + case XML_PARSER_EPILOG: + SKIP_BLANKS; + if (ctxt->input->buf == NULL) + avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); + else + avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); + if (avail < 2) + goto done; + cur = ctxt->input->cur[0]; + next = ctxt->input->cur[1]; + if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + ctxt->instate = XML_PARSER_EPILOG; + } else if ((cur == '<') && (next == '!') && + (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_EPILOG; + } else if ((cur == '<') && (next == '!') && + (avail < 4)) { + goto done; + } else { + ctxt->errNo = XML_ERR_DOCUMENT_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + break; + case XML_PARSER_START_TAG: { + xmlChar *name, *oldname; + + if ((avail < 2) && (ctxt->inputNr == 1)) + goto done; + cur = ctxt->input->cur[0]; + if (cur != '<') { + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Start tag expect, '<' not found\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; + if (ctxt->spaceNr == 0) + spacePush(ctxt, -1); + else + spacePush(ctxt, *ctxt->space); + name = xmlParseStartTag(ctxt); + if (name == NULL) { + spacePop(ctxt); + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + namePush(ctxt, xmlStrdup(name)); + + /* + * [ VC: Root Element Type ] + * The Name in the document type declaration must match + * the element type of the root element. + */ + if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && + ctxt->node && (ctxt->node == ctxt->myDoc->children)) + ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); + + /* + * Check for an Empty Element. + */ + if ((RAW == '/') && (NXT(1) == '>')) { + SKIP(2); + if ((ctxt->sax != NULL) && + (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) + ctxt->sax->endElement(ctxt->userData, name); + xmlFree(name); + oldname = namePop(ctxt); + spacePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + if (ctxt->name == NULL) { + ctxt->instate = XML_PARSER_EPILOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EPILOG\n"); +#endif + } else { + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + } + break; + } + if (RAW == '>') { + NEXT; + } else { + ctxt->errNo = XML_ERR_GT_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Couldn't find end of Start Tag %s\n", + name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + + /* + * end of parsing of this node. + */ + nodePop(ctxt); + oldname = namePop(ctxt); + spacePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } + xmlFree(name); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + break; + } + case XML_PARSER_CONTENT: { + const xmlChar *test; + int cons; + if ((avail < 2) && (ctxt->inputNr == 1)) + goto done; + cur = ctxt->input->cur[0]; + next = ctxt->input->cur[1]; + + test = CUR_PTR; + cons = ctxt->input->consumed; + if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + } else if ((cur == '<') && (next == '!') && + (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && + (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && + (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && + (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && + (ctxt->input->cur[8] == '[')) { + SKIP(9); + ctxt->instate = XML_PARSER_CDATA_SECTION; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CDATA_SECTION\n"); +#endif + break; + } else if ((cur == '<') && (next == '!') && + (avail < 9)) { + goto done; + } else if ((cur == '<') && (next == '/')) { + ctxt->instate = XML_PARSER_END_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering END_TAG\n"); +#endif + break; + } else if (cur == '<') { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering START_TAG\n"); +#endif + break; + } else if (cur == '&') { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing Reference\n"); +#endif + xmlParseReference(ctxt); + } else { + /* TODO Avoid the extra copy, handle directly !!! */ + /* + * Goal of the following test is: + * - minimize calls to the SAX 'character' callback + * when they are mergeable + * - handle an problem for isBlank when we only parse + * a sequence of blank chars and the next one is + * not available to check against '<' presence. + * - tries to homogenize the differences in SAX + * callbacks between the push and pull versions + * of the parser. + */ + if ((ctxt->inputNr == 1) && + (avail < XML_PARSER_BIG_BUFFER_SIZE)) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) + goto done; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing char data\n"); +#endif + xmlParseCharData(ctxt, 0); + } + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "detected an error in element content\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_EOF; + break; + } + break; + } + case XML_PARSER_CDATA_SECTION: { + /* + * The Push mode need to have the SAX callback for + * cdataBlock merge back contiguous callbacks. + */ + int base; + + base = xmlParseLookupSequence(ctxt, ']', ']', '>'); + if (base < 0) { + if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (ctxt->sax->cdataBlock != NULL) + ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, + XML_PARSER_BIG_BUFFER_SIZE); + } + SKIP(XML_PARSER_BIG_BUFFER_SIZE); + ctxt->checkIndex = 0; + } + goto done; + } else { + if ((ctxt->sax != NULL) && (base > 0) && + (!ctxt->disableSAX)) { + if (ctxt->sax->cdataBlock != NULL) + ctxt->sax->cdataBlock(ctxt->userData, + ctxt->input->cur, base); + } + SKIP(base + 3); + ctxt->checkIndex = 0; + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + } + break; + } + case XML_PARSER_END_TAG: + if (avail < 2) + goto done; + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + goto done; + xmlParseEndTag(ctxt); + if (ctxt->name == NULL) { + ctxt->instate = XML_PARSER_EPILOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EPILOG\n"); +#endif + } else { + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + } + break; + case XML_PARSER_DTD: { + /* + * Sorry but progressive parsing of the internal subset + * is not expected to be supported. We first check that + * the full content of the internal subset is available and + * the parsing is launched only at that point. + * Internal subset ends up with "']' S? '>'" in an unescaped + * section and not in a ']]>' sequence which are conditional + * sections (whoever argued to keep that crap in XML deserve + * a place in hell !). + */ + int base, i; + xmlChar *buf; + xmlChar quote = 0; + + base = ctxt->input->cur - ctxt->input->base; + if (base < 0) return(0); + if (ctxt->checkIndex > base) + base = ctxt->checkIndex; + buf = ctxt->input->buf->buffer->content; + for (;(unsigned int) base < ctxt->input->buf->buffer->use; + base++) { + if (quote != 0) { + if (buf[base] == quote) + quote = 0; + continue; + } + if (buf[base] == '"') { + quote = '"'; + continue; + } + if (buf[base] == '\'') { + quote = '\''; + continue; + } + if (buf[base] == ']') { + if ((unsigned int) base +1 >= + ctxt->input->buf->buffer->use) + break; + if (buf[base + 1] == ']') { + /* conditional crap, skip both ']' ! */ + base++; + continue; + } + for (i = 0; + (unsigned int) base + i < ctxt->input->buf->buffer->use; + i++) { + if (buf[base + i] == '>') + goto found_end_int_subset; + } + break; + } + } + /* + * We didn't found the end of the Internal subset + */ + if (quote == 0) + ctxt->checkIndex = base; +#ifdef DEBUG_PUSH + if (next == 0) + xmlGenericError(xmlGenericErrorContext, + "PP: lookup of int subset end filed\n"); +#endif + goto done; + +found_end_int_subset: + xmlParseInternalSubset(ctxt); + ctxt->inSubset = 2; + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->externalSubset != NULL)) + ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, + ctxt->extSubSystem, ctxt->extSubURI); + ctxt->inSubset = 0; + ctxt->instate = XML_PARSER_PROLOG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering PROLOG\n"); +#endif + break; + } + case XML_PARSER_COMMENT: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == COMMENT\n"); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_PI: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == PI\n"); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_ENTITY_DECL: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == ENTITY_DECL\n"); + ctxt->instate = XML_PARSER_DTD; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering DTD\n"); +#endif + break; + case XML_PARSER_ENTITY_VALUE: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == ENTITY_VALUE\n"); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering DTD\n"); +#endif + break; + case XML_PARSER_ATTRIBUTE_VALUE: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == ATTRIBUTE_VALUE\n"); + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering START_TAG\n"); +#endif + break; + case XML_PARSER_SYSTEM_LITERAL: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == SYSTEM_LITERAL\n"); + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering START_TAG\n"); +#endif + break; + case XML_PARSER_PUBLIC_LITERAL: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == PUBLIC_LITERAL\n"); + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering START_TAG\n"); +#endif + break; + } + } +done: +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); +#endif + return(ret); +} + +/** + * xmlParseChunk: + * @ctxt: an XML parser context + * @chunk: an char array + * @size: the size in byte of the chunk + * @terminate: last chunk indicator + * + * Parse a Chunk of memory + * + * Returns zero if no error, the xmlParserErrors otherwise. + */ +int +xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, + int terminate) { + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { + int base = ctxt->input->base - ctxt->input->buf->buffer->content; + int cur = ctxt->input->cur - ctxt->input->base; + + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + ctxt->input->base = ctxt->input->buf->buffer->content + base; + ctxt->input->cur = ctxt->input->base + cur; + ctxt->input->end = + &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); +#endif + + if ((terminate) || (ctxt->input->buf->buffer->use > 80)) + xmlParseTryOrFinish(ctxt, terminate); + } else if (ctxt->instate != XML_PARSER_EOF) { + if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { + xmlParserInputBufferPtr in = ctxt->input->buf; + if ((in->encoder != NULL) && (in->buffer != NULL) && + (in->raw != NULL)) { + int nbchars; + + nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlParseChunk: encoder error\n"); + return(XML_ERR_INVALID_ENCODING); + } + } + } + } + xmlParseTryOrFinish(ctxt, terminate); + if (terminate) { + /* + * Check for termination + */ + int avail = 0; + if (ctxt->input->buf == NULL) + avail = ctxt->input->length - + (ctxt->input->cur - ctxt->input->base); + else + avail = ctxt->input->buf->buffer->use - + (ctxt->input->cur - ctxt->input->base); + + if ((ctxt->instate != XML_PARSER_EOF) && + (ctxt->instate != XML_PARSER_EPILOG)) { + ctxt->errNo = XML_ERR_DOCUMENT_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { + ctxt->errNo = XML_ERR_DOCUMENT_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + + } + if (ctxt->instate != XML_PARSER_EOF) { + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + } + ctxt->instate = XML_PARSER_EOF; + } + return((xmlParserErrors) ctxt->errNo); +} + +/************************************************************************ + * * + * I/O front end functions to the parser * + * * + ************************************************************************/ + +/** + * xmlStopParser: + * @ctxt: an XML parser context + * + * Blocks further parser processing + */ +void +xmlStopParser(xmlParserCtxtPtr ctxt) { + ctxt->instate = XML_PARSER_EOF; + if (ctxt->input != NULL) + ctxt->input->cur = BAD_CAST""; +} + +/** + * xmlCreatePushParserCtxt: + * @sax: a SAX handler + * @user_data: The user data returned on SAX callbacks + * @chunk: a pointer to an array of chars + * @size: number of chars in the array + * @filename: an optional file name or URI + * + * Create a parser context for using the XML parser in push mode. + * If @buffer and @size are non-NULL, the data is used to detect + * the encoding. The remaining characters will be parsed so they + * don't need to be fed in again through xmlParseChunk. + * To allow content encoding detection, @size should be >= 4 + * The value of @filename is used for fetching external entities + * and error/warning reports. + * + * Returns the new parser context or NULL + */ + +xmlParserCtxtPtr +xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, + const char *chunk, int size, const char *filename) { + xmlParserCtxtPtr ctxt; + xmlParserInputPtr inputStream; + xmlParserInputBufferPtr buf; + xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; + + /* + * plug some encoding conversion routines + */ + if ((chunk != NULL) && (size >= 4)) + enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); + + buf = xmlAllocParserInputBuffer(enc); + if (buf == NULL) return(NULL); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + xmlFree(buf); + return(NULL); + } + if (sax != NULL) { + if (ctxt->sax != &xmlDefaultSAXHandler) + xmlFree(ctxt->sax); + ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); + if (ctxt->sax == NULL) { + xmlFree(buf); + xmlFree(ctxt); + return(NULL); + } + memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); + if (user_data != NULL) + ctxt->userData = user_data; + } + if (filename == NULL) { + ctxt->directory = NULL; + } else { + ctxt->directory = xmlParserGetDirectory(filename); + } + + inputStream = xmlNewInputStream(ctxt); + if (inputStream == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + if (filename == NULL) + inputStream->filename = NULL; + else + inputStream->filename = (char *) + xmlNormalizeWindowsPath((const xmlChar *) filename); + inputStream->buf = buf; + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->end = + &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; + + inputPush(ctxt, inputStream); + + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL)) { + int base = ctxt->input->base - ctxt->input->buf->buffer->content; + int cur = ctxt->input->cur - ctxt->input->base; + + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + + ctxt->input->base = ctxt->input->buf->buffer->content + base; + ctxt->input->cur = ctxt->input->base + cur; + ctxt->input->end = + &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); +#endif + } + + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + + return(ctxt); +} + +/** + * xmlCreateIOParserCtxt: + * @sax: a SAX handler + * @user_data: The user data returned on SAX callbacks + * @ioread: an I/O read function + * @ioclose: an I/O close function + * @ioctx: an I/O handler + * @enc: the charset encoding if known + * + * Create a parser context for using the XML parser with an existing + * I/O stream + * + * Returns the new parser context or NULL + */ +xmlParserCtxtPtr +xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, + xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, + void *ioctx, xmlCharEncoding enc) { + xmlParserCtxtPtr ctxt; + xmlParserInputPtr inputStream; + xmlParserInputBufferPtr buf; + + buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); + if (buf == NULL) return(NULL); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + xmlFree(buf); + return(NULL); + } + if (sax != NULL) { + if (ctxt->sax != &xmlDefaultSAXHandler) + xmlFree(ctxt->sax); + ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); + if (ctxt->sax == NULL) { + xmlFree(buf); + xmlFree(ctxt); + return(NULL); + } + memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); + if (user_data != NULL) + ctxt->userData = user_data; + } + + inputStream = xmlNewIOInputStream(ctxt, buf, enc); + if (inputStream == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + inputPush(ctxt, inputStream); + + return(ctxt); +} + +/************************************************************************ + * * + * Front ends when parsing a DTD * + * * + ************************************************************************/ + +/** + * xmlIOParseDTD: + * @sax: the SAX handler block or NULL + * @input: an Input Buffer + * @enc: the charset encoding if known + * + * Load and parse a DTD + * + * Returns the resulting xmlDtdPtr or NULL in case of error. + * @input will be freed at parsing end. + */ + +xmlDtdPtr +xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, + xmlCharEncoding enc) { + xmlDtdPtr ret = NULL; + xmlParserCtxtPtr ctxt; + xmlParserInputPtr pinput = NULL; + xmlChar start[4]; + + if (input == NULL) + return(NULL); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + return(NULL); + } + + /* + * Set-up the SAX context + */ + if (sax != NULL) { + if (ctxt->sax != NULL) + xmlFree(ctxt->sax); + ctxt->sax = sax; + ctxt->userData = NULL; + } + + /* + * generate a parser input from the I/O handler + */ + + pinput = xmlNewIOInputStream(ctxt, input, enc); + if (pinput == NULL) { + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + /* + * plug some encoding conversion routines here. + */ + xmlPushInput(ctxt, pinput); + + pinput->filename = NULL; + pinput->line = 1; + pinput->col = 1; + pinput->base = ctxt->input->cur; + pinput->cur = ctxt->input->cur; + pinput->free = NULL; + + /* + * let's parse that entity knowing it's an external subset. + */ + ctxt->inSubset = 2; + ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", + BAD_CAST "none", BAD_CAST "none"); + + if (enc == XML_CHAR_ENCODING_NONE) { + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + } + + xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); + + if (ctxt->myDoc != NULL) { + if (ctxt->wellFormed) { + ret = ctxt->myDoc->extSubset; + ctxt->myDoc->extSubset = NULL; + } else { + ret = NULL; + } + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * xmlSAXParseDTD: + * @sax: the SAX handler block + * @ExternalID: a NAME* containing the External ID of the DTD + * @SystemID: a NAME* containing the URL to the DTD + * + * Load and parse an external subset. + * + * Returns the resulting xmlDtdPtr or NULL in case of error. + */ + +xmlDtdPtr +xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, + const xmlChar *SystemID) { + xmlDtdPtr ret = NULL; + xmlParserCtxtPtr ctxt; + xmlParserInputPtr input = NULL; + xmlCharEncoding enc; + + if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + return(NULL); + } + + /* + * Set-up the SAX context + */ + if (sax != NULL) { + if (ctxt->sax != NULL) + xmlFree(ctxt->sax); + ctxt->sax = sax; + ctxt->userData = NULL; + } + + /* + * Ask the Entity resolver to load the damn thing + */ + + if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) + input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); + if (input == NULL) { + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + /* + * plug some encoding conversion routines here. + */ + xmlPushInput(ctxt, input); + enc = xmlDetectCharEncoding(ctxt->input->cur, 4); + xmlSwitchEncoding(ctxt, enc); + + if (input->filename == NULL) + input->filename = (char *) xmlStrdup(SystemID); + input->line = 1; + input->col = 1; + input->base = ctxt->input->cur; + input->cur = ctxt->input->cur; + input->free = NULL; + + /* + * let's parse that entity knowing it's an external subset. + */ + ctxt->inSubset = 2; + ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", + ExternalID, SystemID); + xmlParseExternalSubset(ctxt, ExternalID, SystemID); + + if (ctxt->myDoc != NULL) { + if (ctxt->wellFormed) { + ret = ctxt->myDoc->extSubset; + ctxt->myDoc->extSubset = NULL; + } else { + ret = NULL; + } + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * xmlParseDTD: + * @ExternalID: a NAME* containing the External ID of the DTD + * @SystemID: a NAME* containing the URL to the DTD + * + * Load and parse an external subset. + * + * Returns the resulting xmlDtdPtr or NULL in case of error. + */ + +xmlDtdPtr +xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { + return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); +} + +/************************************************************************ + * * + * Front ends when parsing an Entity * + * * + ************************************************************************/ + +/** + * xmlParseCtxtExternalEntity: + * @ctx: the existing parsing context + * @URL: the URL for the entity to load + * @ID: the System ID for the entity to load + * @lst: the return value for the set of parsed nodes + * + * Parse an external general entity within an existing parsing context + * An external general parsed entity is well-formed if it matches the + * production labeled extParsedEnt. + * + * [78] extParsedEnt ::= TextDecl? content + * + * Returns 0 if the entity is well formed, -1 in case of args problem and + * the parser error code otherwise + */ + +int +xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, + const xmlChar *ID, xmlNodePtr *lst) { + xmlParserCtxtPtr ctxt; + xmlDocPtr newDoc; + xmlSAXHandlerPtr oldsax = NULL; + int ret = 0; + xmlChar start[4]; + xmlCharEncoding enc; + + if (ctx->depth > 40) { + return(XML_ERR_ENTITY_LOOP); + } + + if (lst != NULL) + *lst = NULL; + if ((URL == NULL) && (ID == NULL)) + return(-1); + if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ + return(-1); + + + ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); + if (ctxt == NULL) return(-1); + ctxt->userData = ctxt; + oldsax = ctxt->sax; + ctxt->sax = ctx->sax; + newDoc = xmlNewDoc(BAD_CAST "1.0"); + if (newDoc == NULL) { + xmlFreeParserCtxt(ctxt); + return(-1); + } + if (ctx->myDoc != NULL) { + newDoc->intSubset = ctx->myDoc->intSubset; + newDoc->extSubset = ctx->myDoc->extSubset; + } + if (ctx->myDoc->URL != NULL) { + newDoc->URL = xmlStrdup(ctx->myDoc->URL); + } + newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); + if (newDoc->children == NULL) { + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + newDoc->intSubset = NULL; + newDoc->extSubset = NULL; + xmlFreeDoc(newDoc); + return(-1); + } + nodePush(ctxt, newDoc->children); + if (ctx->myDoc == NULL) { + ctxt->myDoc = newDoc; + } else { + ctxt->myDoc = ctx->myDoc; + newDoc->children->doc = ctx->myDoc; + } + + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + GROW + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + + /* + * Parse a possible text declaration first + */ + if ((RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + xmlParseTextDecl(ctxt); + } + + /* + * Doing validity checking on chunk doesn't make sense + */ + ctxt->instate = XML_PARSER_CONTENT; + ctxt->validate = ctx->validate; + ctxt->loadsubset = ctx->loadsubset; + ctxt->depth = ctx->depth + 1; + ctxt->replaceEntities = ctx->replaceEntities; + if (ctxt->validate) { + ctxt->vctxt.error = ctx->vctxt.error; + ctxt->vctxt.warning = ctx->vctxt.warning; + } else { + ctxt->vctxt.error = NULL; + ctxt->vctxt.warning = NULL; + } + ctxt->vctxt.nodeTab = NULL; + ctxt->vctxt.nodeNr = 0; + ctxt->vctxt.nodeMax = 0; + ctxt->vctxt.node = NULL; + + xmlParseContent(ctxt); + + if ((RAW == '<') && (NXT(1) == '/')) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else if (RAW != 0) { + ctxt->errNo = XML_ERR_EXTRA_CONTENT; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "extra content at the end of well balanced chunk\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (ctxt->node != newDoc->children) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + if (!ctxt->wellFormed) { + if (ctxt->errNo == 0) + ret = 1; + else + ret = ctxt->errNo; + } else { + if (lst != NULL) { + xmlNodePtr cur; + + /* + * Return the newly created nodeset after unlinking it from + * they pseudo parent. + */ + cur = newDoc->children->children; + *lst = cur; + while (cur != NULL) { + cur->parent = NULL; + cur = cur->next; + } + newDoc->children->children = NULL; + } + ret = 0; + } + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + newDoc->intSubset = NULL; + newDoc->extSubset = NULL; + xmlFreeDoc(newDoc); + + return(ret); +} + +/** + * xmlParseExternalEntityPrivate: + * @doc: the document the chunk pertains to + * @oldctxt: the previous parser context if available + * @sax: the SAX handler bloc (possibly NULL) + * @user_data: The user data returned on SAX callbacks (possibly NULL) + * @depth: Used for loop detection, use 0 + * @URL: the URL for the entity to load + * @ID: the System ID for the entity to load + * @list: the return value for the set of parsed nodes + * + * Private version of xmlParseExternalEntity() + * + * Returns 0 if the entity is well formed, -1 in case of args problem and + * the parser error code otherwise + */ + +static int +xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, + xmlSAXHandlerPtr sax, + void *user_data, int depth, const xmlChar *URL, + const xmlChar *ID, xmlNodePtr *list) { + xmlParserCtxtPtr ctxt; + xmlDocPtr newDoc; + xmlSAXHandlerPtr oldsax = NULL; + int ret = 0; + xmlChar start[4]; + xmlCharEncoding enc; + + if (depth > 40) { + return(XML_ERR_ENTITY_LOOP); + } + + + + if (list != NULL) + *list = NULL; + if ((URL == NULL) && (ID == NULL)) + return(-1); + if (doc == NULL) /* @@ relax but check for dereferences */ + return(-1); + + + ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); + if (ctxt == NULL) return(-1); + ctxt->userData = ctxt; + if (oldctxt != NULL) { + ctxt->_private = oldctxt->_private; + ctxt->loadsubset = oldctxt->loadsubset; + ctxt->validate = oldctxt->validate; + ctxt->external = oldctxt->external; + } else { + /* + * Doing validity checking on chunk without context + * doesn't make sense + */ + ctxt->_private = NULL; + ctxt->validate = 0; + ctxt->external = 2; + ctxt->loadsubset = 0; + } + if (sax != NULL) { + oldsax = ctxt->sax; + ctxt->sax = sax; + if (user_data != NULL) + ctxt->userData = user_data; + } + newDoc = xmlNewDoc(BAD_CAST "1.0"); + if (newDoc == NULL) { + xmlFreeParserCtxt(ctxt); + return(-1); + } + if (doc != NULL) { + newDoc->intSubset = doc->intSubset; + newDoc->extSubset = doc->extSubset; + } + if (doc->URL != NULL) { + newDoc->URL = xmlStrdup(doc->URL); + } + newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); + if (newDoc->children == NULL) { + if (sax != NULL) + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + newDoc->intSubset = NULL; + newDoc->extSubset = NULL; + xmlFreeDoc(newDoc); + return(-1); + } + nodePush(ctxt, newDoc->children); + if (doc == NULL) { + ctxt->myDoc = newDoc; + } else { + ctxt->myDoc = doc; + newDoc->children->doc = doc; + } + + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + GROW; + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + + /* + * Parse a possible text declaration first + */ + if ((RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + xmlParseTextDecl(ctxt); + } + + ctxt->instate = XML_PARSER_CONTENT; + ctxt->depth = depth; + + xmlParseContent(ctxt); + + if ((RAW == '<') && (NXT(1) == '/')) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else if (RAW != 0) { + ctxt->errNo = XML_ERR_EXTRA_CONTENT; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "extra content at the end of well balanced chunk\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (ctxt->node != newDoc->children) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + if (!ctxt->wellFormed) { + if (ctxt->errNo == 0) + ret = 1; + else + ret = ctxt->errNo; + } else { + if (list != NULL) { + xmlNodePtr cur; + + /* + * Return the newly created nodeset after unlinking it from + * they pseudo parent. + */ + cur = newDoc->children->children; + *list = cur; + while (cur != NULL) { + cur->parent = NULL; + cur = cur->next; + } + newDoc->children->children = NULL; + } + ret = 0; + } + if (sax != NULL) + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + newDoc->intSubset = NULL; + newDoc->extSubset = NULL; + xmlFreeDoc(newDoc); + + return(ret); +} + +/** + * xmlParseExternalEntity: + * @doc: the document the chunk pertains to + * @sax: the SAX handler bloc (possibly NULL) + * @user_data: The user data returned on SAX callbacks (possibly NULL) + * @depth: Used for loop detection, use 0 + * @URL: the URL for the entity to load + * @ID: the System ID for the entity to load + * @lst: the return value for the set of parsed nodes + * + * Parse an external general entity + * An external general parsed entity is well-formed if it matches the + * production labeled extParsedEnt. + * + * [78] extParsedEnt ::= TextDecl? content + * + * Returns 0 if the entity is well formed, -1 in case of args problem and + * the parser error code otherwise + */ + +int +xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, + int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { + return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, + ID, lst)); +} + +/** + * xmlParseBalancedChunkMemory: + * @doc: the document the chunk pertains to + * @sax: the SAX handler bloc (possibly NULL) + * @user_data: The user data returned on SAX callbacks (possibly NULL) + * @depth: Used for loop detection, use 0 + * @string: the input string in UTF8 or ISO-Latin (zero terminated) + * @lst: the return value for the set of parsed nodes + * + * Parse a well-balanced chunk of an XML document + * called by the parser + * The allowed sequence for the Well Balanced Chunk is the one defined by + * the content production in the XML grammar: + * + * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* + * + * Returns 0 if the chunk is well balanced, -1 in case of args problem and + * the parser error code otherwise + */ + +int +xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, + void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { + return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, + depth, string, lst, 0 ); +} + +/** + * xmlParseBalancedChunkMemoryInternal: + * @oldctxt: the existing parsing context + * @string: the input string in UTF8 or ISO-Latin (zero terminated) + * @user_data: the user data field for the parser context + * @lst: the return value for the set of parsed nodes + * + * + * Parse a well-balanced chunk of an XML document + * called by the parser + * The allowed sequence for the Well Balanced Chunk is the one defined by + * the content production in the XML grammar: + * + * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* + * + * Returns 0 if the chunk is well balanced, -1 in case of args problem and + * the parser error code otherwise + * + * In case recover is set to 1, the nodelist will not be empty even if + * the parsed chunk is not well balanced. + */ +static int +xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, + const xmlChar *string, void *user_data, xmlNodePtr *lst) { + xmlParserCtxtPtr ctxt; + xmlDocPtr newDoc = NULL; + xmlSAXHandlerPtr oldsax = NULL; + xmlNodePtr content = NULL; + int size; + int ret = 0; + + if (oldctxt->depth > 40) { + return(XML_ERR_ENTITY_LOOP); + } + + + if (lst != NULL) + *lst = NULL; + if (string == NULL) + return(-1); + + size = xmlStrlen(string); + + ctxt = xmlCreateMemoryParserCtxt((char *) string, size); + if (ctxt == NULL) return(-1); + if (user_data != NULL) + ctxt->userData = user_data; + else + ctxt->userData = ctxt; + + oldsax = ctxt->sax; + ctxt->sax = oldctxt->sax; + ctxt->_private = oldctxt->_private; + if (oldctxt->myDoc == NULL) { + newDoc = xmlNewDoc(BAD_CAST "1.0"); + if (newDoc == NULL) { + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + return(-1); + } + ctxt->myDoc = newDoc; + } else { + ctxt->myDoc = oldctxt->myDoc; + content = ctxt->myDoc->children; + } + ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL, + BAD_CAST "pseudoroot", NULL); + if (ctxt->myDoc->children == NULL) { + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + if (newDoc != NULL) + xmlFreeDoc(newDoc); + return(-1); + } + nodePush(ctxt, ctxt->myDoc->children); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->depth = oldctxt->depth + 1; + + /* + * Doing validity checking on chunk doesn't make sense + */ + ctxt->validate = 0; + ctxt->loadsubset = oldctxt->loadsubset; + + xmlParseContent(ctxt); + if ((RAW == '<') && (NXT(1) == '/')) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else if (RAW != 0) { + ctxt->errNo = XML_ERR_EXTRA_CONTENT; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "extra content at the end of well balanced chunk\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (ctxt->node != ctxt->myDoc->children) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + if (!ctxt->wellFormed) { + if (ctxt->errNo == 0) + ret = 1; + else + ret = ctxt->errNo; + } else { + ret = 0; + } + + if ((lst != NULL) && (ret == 0)) { + xmlNodePtr cur; + + /* + * Return the newly created nodeset after unlinking it from + * they pseudo parent. + */ + cur = ctxt->myDoc->children->children; + *lst = cur; + while (cur != NULL) { + cur->parent = NULL; + cur = cur->next; + } + ctxt->myDoc->children->children = NULL; + } + if (ctxt->myDoc != NULL) { + xmlFreeNode(ctxt->myDoc->children); + ctxt->myDoc->children = content; + } + + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + if (newDoc != NULL) + xmlFreeDoc(newDoc); + + return(ret); +} + +/** + * xmlParseBalancedChunkMemoryRecover: + * @doc: the document the chunk pertains to + * @sax: the SAX handler bloc (possibly NULL) + * @user_data: The user data returned on SAX callbacks (possibly NULL) + * @depth: Used for loop detection, use 0 + * @string: the input string in UTF8 or ISO-Latin (zero terminated) + * @lst: the return value for the set of parsed nodes + * @recover: return nodes even if the data is broken (use 0) + * + * + * Parse a well-balanced chunk of an XML document + * called by the parser + * The allowed sequence for the Well Balanced Chunk is the one defined by + * the content production in the XML grammar: + * + * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* + * + * Returns 0 if the chunk is well balanced, -1 in case of args problem and + * the parser error code otherwise + * + * In case recover is set to 1, the nodelist will not be empty even if + * the parsed chunk is not well balanced. + */ +int +xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, + void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, + int recover) { + xmlParserCtxtPtr ctxt; + xmlDocPtr newDoc; + xmlSAXHandlerPtr oldsax = NULL; + xmlNodePtr content; + int size; + int ret = 0; + + if (depth > 40) { + return(XML_ERR_ENTITY_LOOP); + } + + + if (lst != NULL) + *lst = NULL; + if (string == NULL) + return(-1); + + size = xmlStrlen(string); + + ctxt = xmlCreateMemoryParserCtxt((char *) string, size); + if (ctxt == NULL) return(-1); + ctxt->userData = ctxt; + if (sax != NULL) { + oldsax = ctxt->sax; + ctxt->sax = sax; + if (user_data != NULL) + ctxt->userData = user_data; + } + newDoc = xmlNewDoc(BAD_CAST "1.0"); + if (newDoc == NULL) { + xmlFreeParserCtxt(ctxt); + return(-1); + } + if (doc != NULL) { + newDoc->intSubset = doc->intSubset; + newDoc->extSubset = doc->extSubset; + } + newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); + if (newDoc->children == NULL) { + if (sax != NULL) + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + newDoc->intSubset = NULL; + newDoc->extSubset = NULL; + xmlFreeDoc(newDoc); + return(-1); + } + nodePush(ctxt, newDoc->children); + if (doc == NULL) { + ctxt->myDoc = newDoc; + } else { + ctxt->myDoc = newDoc; + newDoc->children->doc = doc; + } + ctxt->instate = XML_PARSER_CONTENT; + ctxt->depth = depth; + + /* + * Doing validity checking on chunk doesn't make sense + */ + ctxt->validate = 0; + ctxt->loadsubset = 0; + + if ( doc != NULL ){ + content = doc->children; + doc->children = NULL; + xmlParseContent(ctxt); + doc->children = content; + } + else { + xmlParseContent(ctxt); + } + if ((RAW == '<') && (NXT(1) == '/')) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else if (RAW != 0) { + ctxt->errNo = XML_ERR_EXTRA_CONTENT; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "extra content at the end of well balanced chunk\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + if (ctxt->node != newDoc->children) { + ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "chunk is not well balanced\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + + if (!ctxt->wellFormed) { + if (ctxt->errNo == 0) + ret = 1; + else + ret = ctxt->errNo; + } else { + ret = 0; + } + + if (lst != NULL && (ret == 0 || recover == 1)) { + xmlNodePtr cur; + + /* + * Return the newly created nodeset after unlinking it from + * they pseudo parent. + */ + cur = newDoc->children->children; + *lst = cur; + while (cur != NULL) { + cur->parent = NULL; + cur = cur->next; + } + newDoc->children->children = NULL; + } + + if (sax != NULL) + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + newDoc->intSubset = NULL; + newDoc->extSubset = NULL; + xmlFreeDoc(newDoc); + + return(ret); +} + +/** + * xmlSAXParseEntity: + * @sax: the SAX handler block + * @filename: the filename + * + * parse an XML external entity out of context and build a tree. + * It use the given SAX function block to handle the parsing callback. + * If sax is NULL, fallback to the default DOM tree building routines. + * + * [78] extParsedEnt ::= TextDecl? content + * + * This correspond to a "Well Balanced" chunk + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { + xmlDocPtr ret; + xmlParserCtxtPtr ctxt; + char *directory = NULL; + + ctxt = xmlCreateFileParserCtxt(filename); + if (ctxt == NULL) { + return(NULL); + } + if (sax != NULL) { + if (ctxt->sax != NULL) + xmlFree(ctxt->sax); + ctxt->sax = sax; + ctxt->userData = NULL; + } + + if ((ctxt->directory == NULL) && (directory == NULL)) + directory = xmlParserGetDirectory(filename); + + xmlParseExtParsedEnt(ctxt); + + if (ctxt->wellFormed) + ret = ctxt->myDoc; + else { + ret = NULL; + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } + if (sax != NULL) + ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * xmlParseEntity: + * @filename: the filename + * + * parse an XML external entity out of context and build a tree. + * + * [78] extParsedEnt ::= TextDecl? content + * + * This correspond to a "Well Balanced" chunk + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlParseEntity(const char *filename) { + return(xmlSAXParseEntity(NULL, filename)); +} + +/** + * xmlCreateEntityParserCtxt: + * @URL: the entity URL + * @ID: the entity PUBLIC ID + * @base: a possible base for the target URI + * + * Create a parser context for an external entity + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * + * Returns the new parser context or NULL + */ +xmlParserCtxtPtr +xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, + const xmlChar *base) { + xmlParserCtxtPtr ctxt; + xmlParserInputPtr inputStream; + char *directory = NULL; + xmlChar *uri; + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + return(NULL); + } + + uri = xmlBuildURI(URL, base); + + if (uri == NULL) { + inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); + if (inputStream == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + inputPush(ctxt, inputStream); + + if ((ctxt->directory == NULL) && (directory == NULL)) + directory = xmlParserGetDirectory((char *)URL); + if ((ctxt->directory == NULL) && (directory != NULL)) + ctxt->directory = directory; + } else { + inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); + if (inputStream == NULL) { + xmlFree(uri); + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + inputPush(ctxt, inputStream); + + if ((ctxt->directory == NULL) && (directory == NULL)) + directory = xmlParserGetDirectory((char *)uri); + if ((ctxt->directory == NULL) && (directory != NULL)) + ctxt->directory = directory; + xmlFree(uri); + } + + return(ctxt); +} + +/************************************************************************ + * * + * Front ends when parsing from a file * + * * + ************************************************************************/ + +/** + * xmlCreateFileParserCtxt: + * @filename: the filename + * + * Create a parser context for a file content. + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * + * Returns the new parser context or NULL + */ +xmlParserCtxtPtr +xmlCreateFileParserCtxt(const char *filename) +{ + xmlParserCtxtPtr ctxt; + xmlParserInputPtr inputStream; + char *directory = NULL; + xmlChar *normalized; + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + if (xmlDefaultSAXHandler.error != NULL) { + xmlDefaultSAXHandler.error(NULL, "out of memory\n"); + } + return(NULL); + } + + normalized = xmlNormalizeWindowsPath((const xmlChar *) filename); + if (normalized == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt); + if (inputStream == NULL) { + xmlFreeParserCtxt(ctxt); + xmlFree(normalized); + return(NULL); + } + + inputPush(ctxt, inputStream); + if ((ctxt->directory == NULL) && (directory == NULL)) + directory = xmlParserGetDirectory((char *) normalized); + if ((ctxt->directory == NULL) && (directory != NULL)) + ctxt->directory = directory; + + xmlFree(normalized); + + return(ctxt); +} + +/** + * xmlSAXParseFileWithData: + * @sax: the SAX handler block + * @filename: the filename + * @recovery: work in recovery mode, i.e. tries to read no Well Formed + * documents + * @data: the userdata + * + * parse an XML file and build a tree. Automatic support for ZLIB/Compress + * compressed document is provided by default if found at compile-time. + * It use the given SAX function block to handle the parsing callback. + * If sax is NULL, fallback to the default DOM tree building routines. + * + * User data (void *) is stored within the parser context in the + * context's _private member, so it is available nearly everywhere in libxml + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, + int recovery, void *data) { + xmlDocPtr ret; + xmlParserCtxtPtr ctxt; + char *directory = NULL; + + xmlInitParser(); + + ctxt = xmlCreateFileParserCtxt(filename); + if (ctxt == NULL) { + return(NULL); + } + if (sax != NULL) { + if (ctxt->sax != NULL) + xmlFree(ctxt->sax); + ctxt->sax = sax; + } + if (data!=NULL) { + ctxt->_private=data; + } + + if ((ctxt->directory == NULL) && (directory == NULL)) + directory = xmlParserGetDirectory(filename); + if ((ctxt->directory == NULL) && (directory != NULL)) + ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); + + ctxt->recovery = recovery; + + xmlParseDocument(ctxt); + + if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; + else { + ret = NULL; + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } + if (sax != NULL) + ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * xmlSAXParseFile: + * @sax: the SAX handler block + * @filename: the filename + * @recovery: work in recovery mode, i.e. tries to read no Well Formed + * documents + * + * parse an XML file and build a tree. Automatic support for ZLIB/Compress + * compressed document is provided by default if found at compile-time. + * It use the given SAX function block to handle the parsing callback. + * If sax is NULL, fallback to the default DOM tree building routines. + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, + int recovery) { + return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); +} + +/** + * xmlRecoverDoc: + * @cur: a pointer to an array of xmlChar + * + * parse an XML in-memory document and build a tree. + * In the case the document is not Well Formed, a tree is built anyway + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlRecoverDoc(xmlChar *cur) { + return(xmlSAXParseDoc(NULL, cur, 1)); +} + +/** + * xmlParseFile: + * @filename: the filename + * + * parse an XML file and build a tree. Automatic support for ZLIB/Compress + * compressed document is provided by default if found at compile-time. + * + * Returns the resulting document tree if the file was wellformed, + * NULL otherwise. + */ + +xmlDocPtr +xmlParseFile(const char *filename) { + return(xmlSAXParseFile(NULL, filename, 0)); +} + +/** + * xmlRecoverFile: + * @filename: the filename + * + * parse an XML file and build a tree. Automatic support for ZLIB/Compress + * compressed document is provided by default if found at compile-time. + * In the case the document is not Well Formed, a tree is built anyway + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlRecoverFile(const char *filename) { + return(xmlSAXParseFile(NULL, filename, 1)); +} + + +/** + * xmlSetupParserForBuffer: + * @ctxt: an XML parser context + * @buffer: a xmlChar * buffer + * @filename: a file name + * + * Setup the parser context to parse a new buffer; Clears any prior + * contents from the parser context. The buffer parameter must not be + * NULL, but the filename parameter can be + */ +void +xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, + const char* filename) +{ + xmlParserInputPtr input; + + input = xmlNewInputStream(ctxt); + if (input == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc"); + xmlFree(ctxt); + return; + } + + xmlClearParserCtxt(ctxt); + if (filename != NULL) + input->filename = xmlMemStrdup(filename); + input->base = buffer; + input->cur = buffer; + input->end = &buffer[xmlStrlen(buffer)]; + inputPush(ctxt, input); +} + +/** + * xmlSAXUserParseFile: + * @sax: a SAX handler + * @user_data: The user data returned on SAX callbacks + * @filename: a file name + * + * parse an XML file and call the given SAX handler routines. + * Automatic support for ZLIB/Compress compressed document is provided + * + * Returns 0 in case of success or a error number otherwise + */ +int +xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, + const char *filename) { + int ret = 0; + xmlParserCtxtPtr ctxt; + + ctxt = xmlCreateFileParserCtxt(filename); + if (ctxt == NULL) return -1; + if (ctxt->sax != &xmlDefaultSAXHandler) + xmlFree(ctxt->sax); + ctxt->sax = sax; + if (user_data != NULL) + ctxt->userData = user_data; + + xmlParseDocument(ctxt); + + if (ctxt->wellFormed) + ret = 0; + else { + if (ctxt->errNo != 0) + ret = ctxt->errNo; + else + ret = -1; + } + if (sax != NULL) + ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return ret; +} + +/************************************************************************ + * * + * Front ends when parsing from memory * + * * + ************************************************************************/ + +/** + * xmlCreateMemoryParserCtxt: + * @buffer: a pointer to a char array + * @size: the size of the array + * + * Create a parser context for an XML in-memory document. + * + * Returns the new parser context or NULL + */ +xmlParserCtxtPtr +xmlCreateMemoryParserCtxt(const char *buffer, int size) { + xmlParserCtxtPtr ctxt; + xmlParserInputPtr input; + xmlParserInputBufferPtr buf; + + if (buffer == NULL) + return(NULL); + if (size <= 0) + return(NULL); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) + return(NULL); + + buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); + if (buf == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + input = xmlNewInputStream(ctxt); + if (input == NULL) { + xmlFreeParserInputBuffer(buf); + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + input->filename = NULL; + input->buf = buf; + input->base = input->buf->buffer->content; + input->cur = input->buf->buffer->content; + input->end = &input->buf->buffer->content[input->buf->buffer->use]; + + inputPush(ctxt, input); + return(ctxt); +} + +/** + * xmlSAXParseMemoryWithData: + * @sax: the SAX handler block + * @buffer: an pointer to a char array + * @size: the size of the array + * @recovery: work in recovery mode, i.e. tries to read no Well Formed + * documents + * @data: the userdata + * + * parse an XML in-memory block and use the given SAX function block + * to handle the parsing callback. If sax is NULL, fallback to the default + * DOM tree building routines. + * + * User data (void *) is stored within the parser context in the + * context's _private member, so it is available nearly everywhere in libxml + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, + int size, int recovery, void *data) { + xmlDocPtr ret; + xmlParserCtxtPtr ctxt; + + ctxt = xmlCreateMemoryParserCtxt(buffer, size); + if (ctxt == NULL) return(NULL); + if (sax != NULL) { + if (ctxt->sax != NULL) + xmlFree(ctxt->sax); + ctxt->sax = sax; + } + if (data!=NULL) { + ctxt->_private=data; + } + + xmlParseDocument(ctxt); + + if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; + else { + ret = NULL; + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } + if (sax != NULL) + ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * xmlSAXParseMemory: + * @sax: the SAX handler block + * @buffer: an pointer to a char array + * @size: the size of the array + * @recovery: work in recovery mode, i.e. tries to read not Well Formed + * documents + * + * parse an XML in-memory block and use the given SAX function block + * to handle the parsing callback. If sax is NULL, fallback to the default + * DOM tree building routines. + * + * Returns the resulting document tree + */ +xmlDocPtr +xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, + int size, int recovery) { + return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); +} + +/** + * xmlParseMemory: + * @buffer: an pointer to a char array + * @size: the size of the array + * + * parse an XML in-memory block and build a tree. + * + * Returns the resulting document tree + */ + +xmlDocPtr xmlParseMemory(const char *buffer, int size) { + return(xmlSAXParseMemory(NULL, buffer, size, 0)); +} + +/** + * xmlRecoverMemory: + * @buffer: an pointer to a char array + * @size: the size of the array + * + * parse an XML in-memory block and build a tree. + * In the case the document is not Well Formed, a tree is built anyway + * + * Returns the resulting document tree + */ + +xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { + return(xmlSAXParseMemory(NULL, buffer, size, 1)); +} + +/** + * xmlSAXUserParseMemory: + * @sax: a SAX handler + * @user_data: The user data returned on SAX callbacks + * @buffer: an in-memory XML document input + * @size: the length of the XML document in bytes + * + * A better SAX parsing routine. + * parse an XML in-memory buffer and call the given SAX handler routines. + * + * Returns 0 in case of success or a error number otherwise + */ +int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, + const char *buffer, int size) { + int ret = 0; + xmlParserCtxtPtr ctxt; + xmlSAXHandlerPtr oldsax = NULL; + + if (sax == NULL) return -1; + ctxt = xmlCreateMemoryParserCtxt(buffer, size); + if (ctxt == NULL) return -1; + oldsax = ctxt->sax; + ctxt->sax = sax; + if (user_data != NULL) + ctxt->userData = user_data; + + xmlParseDocument(ctxt); + + if (ctxt->wellFormed) + ret = 0; + else { + if (ctxt->errNo != 0) + ret = ctxt->errNo; + else + ret = -1; + } + ctxt->sax = oldsax; + xmlFreeParserCtxt(ctxt); + + return ret; +} + +/** + * xmlCreateDocParserCtxt: + * @cur: a pointer to an array of xmlChar + * + * Creates a parser context for an XML in-memory document. + * + * Returns the new parser context or NULL + */ +xmlParserCtxtPtr +xmlCreateDocParserCtxt(xmlChar *cur) { + int len; + + if (cur == NULL) + return(NULL); + len = xmlStrlen(cur); + return(xmlCreateMemoryParserCtxt((char *)cur, len)); +} + +/** + * xmlSAXParseDoc: + * @sax: the SAX handler block + * @cur: a pointer to an array of xmlChar + * @recovery: work in recovery mode, i.e. tries to read no Well Formed + * documents + * + * parse an XML in-memory document and build a tree. + * It use the given SAX function block to handle the parsing callback. + * If sax is NULL, fallback to the default DOM tree building routines. + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { + xmlDocPtr ret; + xmlParserCtxtPtr ctxt; + + if (cur == NULL) return(NULL); + + + ctxt = xmlCreateDocParserCtxt(cur); + if (ctxt == NULL) return(NULL); + if (sax != NULL) { + ctxt->sax = sax; + ctxt->userData = NULL; + } + + xmlParseDocument(ctxt); + if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; + else { + ret = NULL; + xmlFreeDoc(ctxt->myDoc); + ctxt->myDoc = NULL; + } + if (sax != NULL) + ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return(ret); +} + +/** + * xmlParseDoc: + * @cur: a pointer to an array of xmlChar + * + * parse an XML in-memory document and build a tree. + * + * Returns the resulting document tree + */ + +xmlDocPtr +xmlParseDoc(xmlChar *cur) { + return(xmlSAXParseDoc(NULL, cur, 0)); +} + +/************************************************************************ + * * + * Specific function to keep track of entities references * + * and used by the XSLT debugger * + * * + ************************************************************************/ + +static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; + +/** + * xmlAddEntityReference: + * @ent : A valid entity + * @firstNode : A valid first node for children of entity + * @lastNode : A valid last node of children entity + * + * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY + */ +static void +xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, + xmlNodePtr lastNode) +{ + if (xmlEntityRefFunc != NULL) { + (*xmlEntityRefFunc) (ent, firstNode, lastNode); + } +} + + +/** + * xmlSetEntityReferenceFunc: + * @func: A valid function + * + * Set the function to call call back when a xml reference has been made + */ +void +xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) +{ + xmlEntityRefFunc = func; +} + +/************************************************************************ + * * + * Miscellaneous * + * * + ************************************************************************/ + +#ifdef LIBXML_XPATH_ENABLED +#include <libxml/xpath.h> +#endif + +extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); +static int xmlParserInitialized = 0; + +/** + * xmlInitParser: + * + * Initialization function for the XML parser. + * This is not reentrant. Call once before processing in case of + * use in multithreaded programs. + */ + +void +xmlInitParser(void) { + if (xmlParserInitialized != 0) + return; + + if ((xmlGenericError == xmlGenericErrorDefaultFunc) || + (xmlGenericError == NULL)) + initGenericErrorDefaultFunc(NULL); + xmlInitThreads(); + xmlInitMemory(); + xmlInitCharEncodingHandlers(); + xmlInitializePredefinedEntities(); + xmlDefaultSAXHandlerInit(); + xmlRegisterDefaultInputCallbacks(); + xmlRegisterDefaultOutputCallbacks(); +#ifdef LIBXML_HTML_ENABLED + htmlInitAutoClose(); + htmlDefaultSAXHandlerInit(); +#endif +#ifdef LIBXML_XPATH_ENABLED + xmlXPathInit(); +#endif + xmlParserInitialized = 1; +} + +/** + * xmlCleanupParser: + * + * Cleanup function for the XML parser. It tries to reclaim all + * parsing related global memory allocated for the parser processing. + * It doesn't deallocate any document related memory. Calling this + * function should not prevent reusing the parser. + */ + +void +xmlCleanupParser(void) { + xmlCleanupCharEncodingHandlers(); + xmlCleanupPredefinedEntities(); +#ifdef LIBXML_CATALOG_ENABLED + xmlCatalogCleanup(); +#endif + xmlCleanupThreads(); + xmlParserInitialized = 0; +} diff --git a/bundle/libxml/parserInternals.c b/bundle/libxml/parserInternals.c new file mode 100644 index 0000000000..65551441ee --- /dev/null +++ b/bundle/libxml/parserInternals.c @@ -0,0 +1,3481 @@ +/* + * parserInternals.c : Internal routines (and obsolete ones) needed for the + * XML and HTML parsers. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#if defined(WIN32) && !defined (__CYGWIN__) +#define XML_DIR_SEP '\\' +#else +#define XML_DIR_SEP '/' +#endif + +#include <string.h> +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/valid.h> +#include <libxml/entities.h> +#include <libxml/xmlerror.h> +#include <libxml/encoding.h> +#include <libxml/valid.h> +#include <libxml/xmlIO.h> +#include <libxml/uri.h> +#include <libxml/SAX.h> +#ifdef LIBXML_CATALOG_ENABLED +#include <libxml/catalog.h> +#endif +#include <libxml/globals.h> + +void xmlUpgradeOldNs(xmlDocPtr doc); + +/* + * Various global defaults for parsing + */ + +/** + * xmlCheckVersion: + * @version: the include version number + * + * check the compiled lib version against the include one. + * This can warn or immediately kill the application + */ +void +xmlCheckVersion(int version) { + int myversion = (int) LIBXML_VERSION; + + xmlInitParser(); + + if ((myversion / 10000) != (version / 10000)) { + xmlGenericError(xmlGenericErrorContext, + "Fatal: program compiled against libxml %d using libxml %d\n", + (version / 10000), (myversion / 10000)); + fprintf(stderr, + "Fatal: program compiled against libxml %d using libxml %d\n", + (version / 10000), (myversion / 10000)); + } + if ((myversion / 100) < (version / 100)) { + xmlGenericError(xmlGenericErrorContext, + "Warning: program compiled against libxml %d using older %d\n", + (version / 100), (myversion / 100)); + } +} + + +static const char *xmlFeaturesList[] = { + "validate", + "load subset", + "keep blanks", + "disable SAX", + "fetch external entities", + "substitute entities", + "gather line info", + "user data", + "is html", + "is standalone", + "stop parser", + "document", + "is well formed", + "is valid", + "SAX block", + "SAX function internalSubset", + "SAX function isStandalone", + "SAX function hasInternalSubset", + "SAX function hasExternalSubset", + "SAX function resolveEntity", + "SAX function getEntity", + "SAX function entityDecl", + "SAX function notationDecl", + "SAX function attributeDecl", + "SAX function elementDecl", + "SAX function unparsedEntityDecl", + "SAX function setDocumentLocator", + "SAX function startDocument", + "SAX function endDocument", + "SAX function startElement", + "SAX function endElement", + "SAX function reference", + "SAX function characters", + "SAX function ignorableWhitespace", + "SAX function processingInstruction", + "SAX function comment", + "SAX function warning", + "SAX function error", + "SAX function fatalError", + "SAX function getParameterEntity", + "SAX function cdataBlock", + "SAX function externalSubset", +}; + +/** + * xmlGetFeaturesList: + * @len: the length of the features name array (input/output) + * @result: an array of string to be filled with the features name. + * + * Copy at most *@len feature names into the @result array + * + * Returns -1 in case or error, or the total number of features, + * len is updated with the number of strings copied, + * strings must not be deallocated + */ +int +xmlGetFeaturesList(int *len, const char **result) { + int ret, i; + + ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]); + if ((len == NULL) || (result == NULL)) + return(ret); + if ((*len < 0) || (*len >= 1000)) + return(-1); + if (*len > ret) + *len = ret; + for (i = 0;i < *len;i++) + result[i] = xmlFeaturesList[i]; + return(ret); +} + +/** + * xmlGetFeature: + * @ctxt: an XML/HTML parser context + * @name: the feature name + * @result: location to store the result + * + * Read the current value of one feature of this parser instance + * + * Returns -1 in case or error, 0 otherwise + */ +int +xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) { + if ((ctxt == NULL) || (name == NULL) || (result == NULL)) + return(-1); + + if (!strcmp(name, "validate")) { + *((int *) result) = ctxt->validate; + } else if (!strcmp(name, "keep blanks")) { + *((int *) result) = ctxt->keepBlanks; + } else if (!strcmp(name, "disable SAX")) { + *((int *) result) = ctxt->disableSAX; + } else if (!strcmp(name, "fetch external entities")) { + *((int *) result) = ctxt->loadsubset; + } else if (!strcmp(name, "substitute entities")) { + *((int *) result) = ctxt->replaceEntities; + } else if (!strcmp(name, "gather line info")) { + *((int *) result) = ctxt->record_info; + } else if (!strcmp(name, "user data")) { + *((void **)result) = ctxt->userData; + } else if (!strcmp(name, "is html")) { + *((int *) result) = ctxt->html; + } else if (!strcmp(name, "is standalone")) { + *((int *) result) = ctxt->standalone; + } else if (!strcmp(name, "document")) { + *((xmlDocPtr *) result) = ctxt->myDoc; + } else if (!strcmp(name, "is well formed")) { + *((int *) result) = ctxt->wellFormed; + } else if (!strcmp(name, "is valid")) { + *((int *) result) = ctxt->valid; + } else if (!strcmp(name, "SAX block")) { + *((xmlSAXHandlerPtr *) result) = ctxt->sax; + } else if (!strcmp(name, "SAX function internalSubset")) { + *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset; + } else if (!strcmp(name, "SAX function isStandalone")) { + *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone; + } else if (!strcmp(name, "SAX function hasInternalSubset")) { + *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset; + } else if (!strcmp(name, "SAX function hasExternalSubset")) { + *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset; + } else if (!strcmp(name, "SAX function resolveEntity")) { + *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity; + } else if (!strcmp(name, "SAX function getEntity")) { + *((getEntitySAXFunc *) result) = ctxt->sax->getEntity; + } else if (!strcmp(name, "SAX function entityDecl")) { + *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl; + } else if (!strcmp(name, "SAX function notationDecl")) { + *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl; + } else if (!strcmp(name, "SAX function attributeDecl")) { + *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl; + } else if (!strcmp(name, "SAX function elementDecl")) { + *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl; + } else if (!strcmp(name, "SAX function unparsedEntityDecl")) { + *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl; + } else if (!strcmp(name, "SAX function setDocumentLocator")) { + *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator; + } else if (!strcmp(name, "SAX function startDocument")) { + *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument; + } else if (!strcmp(name, "SAX function endDocument")) { + *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument; + } else if (!strcmp(name, "SAX function startElement")) { + *((startElementSAXFunc *) result) = ctxt->sax->startElement; + } else if (!strcmp(name, "SAX function endElement")) { + *((endElementSAXFunc *) result) = ctxt->sax->endElement; + } else if (!strcmp(name, "SAX function reference")) { + *((referenceSAXFunc *) result) = ctxt->sax->reference; + } else if (!strcmp(name, "SAX function characters")) { + *((charactersSAXFunc *) result) = ctxt->sax->characters; + } else if (!strcmp(name, "SAX function ignorableWhitespace")) { + *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace; + } else if (!strcmp(name, "SAX function processingInstruction")) { + *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction; + } else if (!strcmp(name, "SAX function comment")) { + *((commentSAXFunc *) result) = ctxt->sax->comment; + } else if (!strcmp(name, "SAX function warning")) { + *((warningSAXFunc *) result) = ctxt->sax->warning; + } else if (!strcmp(name, "SAX function error")) { + *((errorSAXFunc *) result) = ctxt->sax->error; + } else if (!strcmp(name, "SAX function fatalError")) { + *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError; + } else if (!strcmp(name, "SAX function getParameterEntity")) { + *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity; + } else if (!strcmp(name, "SAX function cdataBlock")) { + *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock; + } else if (!strcmp(name, "SAX function externalSubset")) { + *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset; + } else { + return(-1); + } + return(0); +} + +/** + * xmlSetFeature: + * @ctxt: an XML/HTML parser context + * @name: the feature name + * @value: pointer to the location of the new value + * + * Change the current value of one feature of this parser instance + * + * Returns -1 in case or error, 0 otherwise + */ +int +xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) { + if ((ctxt == NULL) || (name == NULL) || (value == NULL)) + return(-1); + + if (!strcmp(name, "validate")) { + int newvalidate = *((int *) value); + if ((!ctxt->validate) && (newvalidate != 0)) { + if (ctxt->vctxt.warning == NULL) + ctxt->vctxt.warning = xmlParserValidityWarning; + if (ctxt->vctxt.error == NULL) + ctxt->vctxt.error = xmlParserValidityError; + ctxt->vctxt.nodeMax = 0; + } + ctxt->validate = newvalidate; + } else if (!strcmp(name, "keep blanks")) { + ctxt->keepBlanks = *((int *) value); + } else if (!strcmp(name, "disable SAX")) { + ctxt->disableSAX = *((int *) value); + } else if (!strcmp(name, "fetch external entities")) { + ctxt->loadsubset = *((int *) value); + } else if (!strcmp(name, "substitute entities")) { + ctxt->replaceEntities = *((int *) value); + } else if (!strcmp(name, "gather line info")) { + ctxt->record_info = *((int *) value); + } else if (!strcmp(name, "user data")) { + ctxt->userData = *((void **)value); + } else if (!strcmp(name, "is html")) { + ctxt->html = *((int *) value); + } else if (!strcmp(name, "is standalone")) { + ctxt->standalone = *((int *) value); + } else if (!strcmp(name, "document")) { + ctxt->myDoc = *((xmlDocPtr *) value); + } else if (!strcmp(name, "is well formed")) { + ctxt->wellFormed = *((int *) value); + } else if (!strcmp(name, "is valid")) { + ctxt->valid = *((int *) value); + } else if (!strcmp(name, "SAX block")) { + ctxt->sax = *((xmlSAXHandlerPtr *) value); + } else if (!strcmp(name, "SAX function internalSubset")) { + ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value); + } else if (!strcmp(name, "SAX function isStandalone")) { + ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value); + } else if (!strcmp(name, "SAX function hasInternalSubset")) { + ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value); + } else if (!strcmp(name, "SAX function hasExternalSubset")) { + ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value); + } else if (!strcmp(name, "SAX function resolveEntity")) { + ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value); + } else if (!strcmp(name, "SAX function getEntity")) { + ctxt->sax->getEntity = *((getEntitySAXFunc *) value); + } else if (!strcmp(name, "SAX function entityDecl")) { + ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value); + } else if (!strcmp(name, "SAX function notationDecl")) { + ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value); + } else if (!strcmp(name, "SAX function attributeDecl")) { + ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value); + } else if (!strcmp(name, "SAX function elementDecl")) { + ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value); + } else if (!strcmp(name, "SAX function unparsedEntityDecl")) { + ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value); + } else if (!strcmp(name, "SAX function setDocumentLocator")) { + ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value); + } else if (!strcmp(name, "SAX function startDocument")) { + ctxt->sax->startDocument = *((startDocumentSAXFunc *) value); + } else if (!strcmp(name, "SAX function endDocument")) { + ctxt->sax->endDocument = *((endDocumentSAXFunc *) value); + } else if (!strcmp(name, "SAX function startElement")) { + ctxt->sax->startElement = *((startElementSAXFunc *) value); + } else if (!strcmp(name, "SAX function endElement")) { + ctxt->sax->endElement = *((endElementSAXFunc *) value); + } else if (!strcmp(name, "SAX function reference")) { + ctxt->sax->reference = *((referenceSAXFunc *) value); + } else if (!strcmp(name, "SAX function characters")) { + ctxt->sax->characters = *((charactersSAXFunc *) value); + } else if (!strcmp(name, "SAX function ignorableWhitespace")) { + ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value); + } else if (!strcmp(name, "SAX function processingInstruction")) { + ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value); + } else if (!strcmp(name, "SAX function comment")) { + ctxt->sax->comment = *((commentSAXFunc *) value); + } else if (!strcmp(name, "SAX function warning")) { + ctxt->sax->warning = *((warningSAXFunc *) value); + } else if (!strcmp(name, "SAX function error")) { + ctxt->sax->error = *((errorSAXFunc *) value); + } else if (!strcmp(name, "SAX function fatalError")) { + ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value); + } else if (!strcmp(name, "SAX function getParameterEntity")) { + ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value); + } else if (!strcmp(name, "SAX function cdataBlock")) { + ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value); + } else if (!strcmp(name, "SAX function externalSubset")) { + ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value); + } else { + return(-1); + } + return(0); +} + +/************************************************************************ + * * + * Some functions to avoid too large macros * + * * + ************************************************************************/ + +/** + * xmlIsChar: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + * Also available as a macro IS_CHAR() + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsChar(int c) { + return( + ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || + (((c) >= 0x20) && ((c) <= 0xD7FF)) || + (((c) >= 0xE000) && ((c) <= 0xFFFD)) || + (((c) >= 0x10000) && ((c) <= 0x10FFFF))); +} + +/** + * xmlIsBlank: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [3] S ::= (#x20 | #x9 | #xD | #xA)+ + * Also available as a macro IS_BLANK() + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsBlank(int c) { + return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D)); +} + +static int xmlBaseArray[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */ + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */ + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */ +}; + +/** + * xmlIsBaseChar: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [85] BaseChar ::= ... long list see REC ... + * + * VI is your friend ! + * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/ + * and + * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/ + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsBaseChar(int c) { + return( + (((c) < 0x0100) ? xmlBaseArray[c] : + ( /* accelerator */ + (((c) >= 0x0100) && ((c) <= 0x0131)) || + (((c) >= 0x0134) && ((c) <= 0x013E)) || + (((c) >= 0x0141) && ((c) <= 0x0148)) || + (((c) >= 0x014A) && ((c) <= 0x017E)) || + (((c) >= 0x0180) && ((c) <= 0x01C3)) || + (((c) >= 0x01CD) && ((c) <= 0x01F0)) || + (((c) >= 0x01F4) && ((c) <= 0x01F5)) || + (((c) >= 0x01FA) && ((c) <= 0x0217)) || + (((c) >= 0x0250) && ((c) <= 0x02A8)) || + (((c) >= 0x02BB) && ((c) <= 0x02C1)) || + ((c) == 0x0386) || + (((c) >= 0x0388) && ((c) <= 0x038A)) || + ((c) == 0x038C) || + (((c) >= 0x038E) && ((c) <= 0x03A1)) || + (((c) >= 0x03A3) && ((c) <= 0x03CE)) || + (((c) >= 0x03D0) && ((c) <= 0x03D6)) || + ((c) == 0x03DA) || + ((c) == 0x03DC) || + ((c) == 0x03DE) || + ((c) == 0x03E0) || + (((c) >= 0x03E2) && ((c) <= 0x03F3)) || + (((c) >= 0x0401) && ((c) <= 0x040C)) || + (((c) >= 0x040E) && ((c) <= 0x044F)) || + (((c) >= 0x0451) && ((c) <= 0x045C)) || + (((c) >= 0x045E) && ((c) <= 0x0481)) || + (((c) >= 0x0490) && ((c) <= 0x04C4)) || + (((c) >= 0x04C7) && ((c) <= 0x04C8)) || + (((c) >= 0x04CB) && ((c) <= 0x04CC)) || + (((c) >= 0x04D0) && ((c) <= 0x04EB)) || + (((c) >= 0x04EE) && ((c) <= 0x04F5)) || + (((c) >= 0x04F8) && ((c) <= 0x04F9)) || + (((c) >= 0x0531) && ((c) <= 0x0556)) || + ((c) == 0x0559) || + (((c) >= 0x0561) && ((c) <= 0x0586)) || + (((c) >= 0x05D0) && ((c) <= 0x05EA)) || + (((c) >= 0x05F0) && ((c) <= 0x05F2)) || + (((c) >= 0x0621) && ((c) <= 0x063A)) || + (((c) >= 0x0641) && ((c) <= 0x064A)) || + (((c) >= 0x0671) && ((c) <= 0x06B7)) || + (((c) >= 0x06BA) && ((c) <= 0x06BE)) || + (((c) >= 0x06C0) && ((c) <= 0x06CE)) || + (((c) >= 0x06D0) && ((c) <= 0x06D3)) || + ((c) == 0x06D5) || + (((c) >= 0x06E5) && ((c) <= 0x06E6)) || + (((c) >= 0x905) && ( /* accelerator */ + (((c) >= 0x0905) && ((c) <= 0x0939)) || + ((c) == 0x093D) || + (((c) >= 0x0958) && ((c) <= 0x0961)) || + (((c) >= 0x0985) && ((c) <= 0x098C)) || + (((c) >= 0x098F) && ((c) <= 0x0990)) || + (((c) >= 0x0993) && ((c) <= 0x09A8)) || + (((c) >= 0x09AA) && ((c) <= 0x09B0)) || + ((c) == 0x09B2) || + (((c) >= 0x09B6) && ((c) <= 0x09B9)) || + (((c) >= 0x09DC) && ((c) <= 0x09DD)) || + (((c) >= 0x09DF) && ((c) <= 0x09E1)) || + (((c) >= 0x09F0) && ((c) <= 0x09F1)) || + (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || + (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || + (((c) >= 0x0A13) && ((c) <= 0x0A28)) || + (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || + (((c) >= 0x0A32) && ((c) <= 0x0A33)) || + (((c) >= 0x0A35) && ((c) <= 0x0A36)) || + (((c) >= 0x0A38) && ((c) <= 0x0A39)) || + (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || + ((c) == 0x0A5E) || + (((c) >= 0x0A72) && ((c) <= 0x0A74)) || + (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || + ((c) == 0x0A8D) || + (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || + (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || + (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || + (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || + (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || + ((c) == 0x0ABD) || + ((c) == 0x0AE0) || + (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || + (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || + (((c) >= 0x0B13) && ((c) <= 0x0B28)) || + (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || + (((c) >= 0x0B32) && ((c) <= 0x0B33)) || + (((c) >= 0x0B36) && ((c) <= 0x0B39)) || + ((c) == 0x0B3D) || + (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || + (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || + (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || + (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || + (((c) >= 0x0B92) && ((c) <= 0x0B95)) || + (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || + ((c) == 0x0B9C) || + (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || + (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || + (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || + (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || + (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || + (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || + (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || + (((c) >= 0x0C12) && ((c) <= 0x0C28)) || + (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || + (((c) >= 0x0C35) && ((c) <= 0x0C39)) || + (((c) >= 0x0C60) && ((c) <= 0x0C61)) || + (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || + (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || + (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || + (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || + (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || + ((c) == 0x0CDE) || + (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || + (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || + (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || + (((c) >= 0x0D12) && ((c) <= 0x0D28)) || + (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || + (((c) >= 0x0D60) && ((c) <= 0x0D61)) || + (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || + ((c) == 0x0E30) || + (((c) >= 0x0E32) && ((c) <= 0x0E33)) || + (((c) >= 0x0E40) && ((c) <= 0x0E45)) || + (((c) >= 0x0E81) && ((c) <= 0x0E82)) || + ((c) == 0x0E84) || + (((c) >= 0x0E87) && ((c) <= 0x0E88)) || + ((c) == 0x0E8A) || + ((c) == 0x0E8D) || + (((c) >= 0x0E94) && ((c) <= 0x0E97)) || + (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || + (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || + ((c) == 0x0EA5) || + ((c) == 0x0EA7) || + (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || + (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || + ((c) == 0x0EB0) || + (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || + ((c) == 0x0EBD) || + (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || + (((c) >= 0x0F40) && ((c) <= 0x0F47)) || + (((c) >= 0x0F49) && ((c) <= 0x0F69)) || + (((c) >= 0x10A0) && ( /* accelerator */ + (((c) >= 0x10A0) && ((c) <= 0x10C5)) || + (((c) >= 0x10D0) && ((c) <= 0x10F6)) || + ((c) == 0x1100) || + (((c) >= 0x1102) && ((c) <= 0x1103)) || + (((c) >= 0x1105) && ((c) <= 0x1107)) || + ((c) == 0x1109) || + (((c) >= 0x110B) && ((c) <= 0x110C)) || + (((c) >= 0x110E) && ((c) <= 0x1112)) || + ((c) == 0x113C) || + ((c) == 0x113E) || + ((c) == 0x1140) || + ((c) == 0x114C) || + ((c) == 0x114E) || + ((c) == 0x1150) || + (((c) >= 0x1154) && ((c) <= 0x1155)) || + ((c) == 0x1159) || + (((c) >= 0x115F) && ((c) <= 0x1161)) || + ((c) == 0x1163) || + ((c) == 0x1165) || + ((c) == 0x1167) || + ((c) == 0x1169) || + (((c) >= 0x116D) && ((c) <= 0x116E)) || + (((c) >= 0x1172) && ((c) <= 0x1173)) || + ((c) == 0x1175) || + ((c) == 0x119E) || + ((c) == 0x11A8) || + ((c) == 0x11AB) || + (((c) >= 0x11AE) && ((c) <= 0x11AF)) || + (((c) >= 0x11B7) && ((c) <= 0x11B8)) || + ((c) == 0x11BA) || + (((c) >= 0x11BC) && ((c) <= 0x11C2)) || + ((c) == 0x11EB) || + ((c) == 0x11F0) || + ((c) == 0x11F9) || + (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || + (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || + (((c) >= 0x1F00) && ((c) <= 0x1F15)) || + (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || + (((c) >= 0x1F20) && ((c) <= 0x1F45)) || + (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || + (((c) >= 0x1F50) && ((c) <= 0x1F57)) || + ((c) == 0x1F59) || + ((c) == 0x1F5B) || + ((c) == 0x1F5D) || + (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || + (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || + (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || + ((c) == 0x1FBE) || + (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || + (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || + (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || + (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || + (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || + (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || + (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || + ((c) == 0x2126) || + (((c) >= 0x212A) && ((c) <= 0x212B)) || + ((c) == 0x212E) || + (((c) >= 0x2180) && ((c) <= 0x2182)) || + (((c) >= 0x3041) && ((c) <= 0x3094)) || + (((c) >= 0x30A1) && ((c) <= 0x30FA)) || + (((c) >= 0x3105) && ((c) <= 0x312C)) || + (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ )))))); +} + +/** + * xmlIsDigit: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [88] Digit ::= ... long list see REC ... + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsDigit(int c) { + return( + (((c) >= 0x0030) && ((c) <= 0x0039)) || + (((c) >= 0x660) && ( /* accelerator */ + (((c) >= 0x0660) && ((c) <= 0x0669)) || + (((c) >= 0x06F0) && ((c) <= 0x06F9)) || + (((c) >= 0x0966) && ((c) <= 0x096F)) || + (((c) >= 0x09E6) && ((c) <= 0x09EF)) || + (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || + (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || + (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || + (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || + (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || + (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || + (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || + (((c) >= 0x0E50) && ((c) <= 0x0E59)) || + (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || + (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ )); +} + +/** + * xmlIsCombining: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [87] CombiningChar ::= ... long list see REC ... + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsCombining(int c) { + return( + (((c) >= 0x300) && ( /* accelerator */ + (((c) >= 0x0300) && ((c) <= 0x0345)) || + (((c) >= 0x0360) && ((c) <= 0x0361)) || + (((c) >= 0x0483) && ((c) <= 0x0486)) || + (((c) >= 0x0591) && ((c) <= 0x05A1)) || + (((c) >= 0x05A3) && ((c) <= 0x05B9)) || + (((c) >= 0x05BB) && ((c) <= 0x05BD)) || + ((c) == 0x05BF) || + (((c) >= 0x05C1) && ((c) <= 0x05C2)) || + ((c) == 0x05C4) || + (((c) >= 0x064B) && ((c) <= 0x0652)) || + ((c) == 0x0670) || + (((c) >= 0x06D6) && ((c) <= 0x06DC)) || + (((c) >= 0x06DD) && ((c) <= 0x06DF)) || + (((c) >= 0x06E0) && ((c) <= 0x06E4)) || + (((c) >= 0x06E7) && ((c) <= 0x06E8)) || + (((c) >= 0x06EA) && ((c) <= 0x06ED)) || + (((c) >= 0x0901) && ( /* accelerator */ + (((c) >= 0x0901) && ((c) <= 0x0903)) || + ((c) == 0x093C) || + (((c) >= 0x093E) && ((c) <= 0x094C)) || + ((c) == 0x094D) || + (((c) >= 0x0951) && ((c) <= 0x0954)) || + (((c) >= 0x0962) && ((c) <= 0x0963)) || + (((c) >= 0x0981) && ((c) <= 0x0983)) || + ((c) == 0x09BC) || + ((c) == 0x09BE) || + ((c) == 0x09BF) || + (((c) >= 0x09C0) && ((c) <= 0x09C4)) || + (((c) >= 0x09C7) && ((c) <= 0x09C8)) || + (((c) >= 0x09CB) && ((c) <= 0x09CD)) || + ((c) == 0x09D7) || + (((c) >= 0x09E2) && ((c) <= 0x09E3)) || + (((c) >= 0x0A02) && ( /* accelerator */ + ((c) == 0x0A02) || + ((c) == 0x0A3C) || + ((c) == 0x0A3E) || + ((c) == 0x0A3F) || + (((c) >= 0x0A40) && ((c) <= 0x0A42)) || + (((c) >= 0x0A47) && ((c) <= 0x0A48)) || + (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || + (((c) >= 0x0A70) && ((c) <= 0x0A71)) || + (((c) >= 0x0A81) && ((c) <= 0x0A83)) || + ((c) == 0x0ABC) || + (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || + (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || + (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || + (((c) >= 0x0B01) && ((c) <= 0x0B03)) || + ((c) == 0x0B3C) || + (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || + (((c) >= 0x0B47) && ((c) <= 0x0B48)) || + (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || + (((c) >= 0x0B56) && ((c) <= 0x0B57)) || + (((c) >= 0x0B82) && ((c) <= 0x0B83)) || + (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || + (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || + (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || + ((c) == 0x0BD7) || + (((c) >= 0x0C01) && ((c) <= 0x0C03)) || + (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || + (((c) >= 0x0C46) && ((c) <= 0x0C48)) || + (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || + (((c) >= 0x0C55) && ((c) <= 0x0C56)) || + (((c) >= 0x0C82) && ((c) <= 0x0C83)) || + (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || + (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || + (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || + (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || + (((c) >= 0x0D02) && ((c) <= 0x0D03)) || + (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || + (((c) >= 0x0D46) && ((c) <= 0x0D48)) || + (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || + ((c) == 0x0D57) || + (((c) >= 0x0E31) && ( /* accelerator */ + ((c) == 0x0E31) || + (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || + (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || + ((c) == 0x0EB1) || + (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || + (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || + (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || + (((c) >= 0x0F18) && ((c) <= 0x0F19)) || + ((c) == 0x0F35) || + ((c) == 0x0F37) || + ((c) == 0x0F39) || + ((c) == 0x0F3E) || + ((c) == 0x0F3F) || + (((c) >= 0x0F71) && ((c) <= 0x0F84)) || + (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || + (((c) >= 0x0F90) && ((c) <= 0x0F95)) || + ((c) == 0x0F97) || + (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || + (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || + ((c) == 0x0FB9) || + (((c) >= 0x20D0) && ((c) <= 0x20DC)) || + ((c) == 0x20E1) || + (((c) >= 0x302A) && ((c) <= 0x302F)) || + ((c) == 0x3099) || + ((c) == 0x309A)))))))))); +} + +/** + * xmlIsExtender: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | + * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | + * [#x309D-#x309E] | [#x30FC-#x30FE] + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsExtender(int c) { + switch (c) { + case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387: + case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005: + case 0x3031: case 0x3032: case 0x3033: case 0x3034: + case 0x3035: case 0x309D: case 0x309E: case 0x30FC: + case 0x30FD: case 0x30FE: + return 1; + default: + return 0; + } +} + +/** + * xmlIsIdeographic: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsIdeographic(int c) { + return(((c) < 0x0100) ? 0 : + (((c) >= 0x4e00) && ((c) <= 0x9fa5)) || + (((c) >= 0xf900) && ((c) <= 0xfa2d)) || + (((c) >= 0x3021) && ((c) <= 0x3029)) || + ((c) == 0x3007)); +} + +/** + * xmlIsLetter: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [84] Letter ::= BaseChar | Ideographic + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsLetter(int c) { + return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); +} + +/** + * xmlIsPubidChar: + * @c: an unicode character (int) + * + * Check whether the character is allowed by the production + * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + * + * Returns 0 if not, non-zero otherwise + */ +int +xmlIsPubidChar(int c) { + return( + ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || + (((c) >= 'a') && ((c) <= 'z')) || + (((c) >= 'A') && ((c) <= 'Z')) || + (((c) >= '0') && ((c) <= '9')) || + ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || + ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || + ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || + ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || + ((c) == '$') || ((c) == '_') || ((c) == '%')); +} + +/************************************************************************ + * * + * Input handling functions for progressive parsing * + * * + ************************************************************************/ + +/* #define DEBUG_INPUT */ +/* #define DEBUG_STACK */ +/* #define DEBUG_PUSH */ + + +/* we need to keep enough input to show errors in context */ +#define LINE_LEN 80 + +#ifdef DEBUG_INPUT +#define CHECK_BUFFER(in) check_buffer(in) + +static +void check_buffer(xmlParserInputPtr in) { + if (in->base != in->buf->buffer->content) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInput: base mismatch problem\n"); + } + if (in->cur < in->base) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInput: cur < base problem\n"); + } + if (in->cur > in->base + in->buf->buffer->use) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInput: cur > base + use problem\n"); + } + xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", + (int) in, (int) in->buf->buffer->content, in->cur - in->base, + in->buf->buffer->use, in->buf->buffer->size); +} + +#else +#define CHECK_BUFFER(in) +#endif + + +/** + * xmlParserInputRead: + * @in: an XML parser input + * @len: an indicative size for the lookahead + * + * This function refresh the input for the parser. It doesn't try to + * preserve pointers to the input buffer, and discard already read data + * + * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the + * end of this entity + */ +int +xmlParserInputRead(xmlParserInputPtr in, int len) { + int ret; + int used; + int indx; + +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, "Read\n"); +#endif + if (in->buf == NULL) return(-1); + if (in->base == NULL) return(-1); + if (in->cur == NULL) return(-1); + if (in->buf->buffer == NULL) return(-1); + if (in->buf->readcallback == NULL) return(-1); + + CHECK_BUFFER(in); + + used = in->cur - in->buf->buffer->content; + ret = xmlBufferShrink(in->buf->buffer, used); + if (ret > 0) { + in->cur -= ret; + in->consumed += ret; + } + ret = xmlParserInputBufferRead(in->buf, len); + if (in->base != in->buf->buffer->content) { + /* + * the buffer has been reallocated + */ + indx = in->cur - in->base; + in->base = in->buf->buffer->content; + in->cur = &in->buf->buffer->content[indx]; + } + in->end = &in->buf->buffer->content[in->buf->buffer->use]; + + CHECK_BUFFER(in); + + return(ret); +} + +/** + * xmlParserInputGrow: + * @in: an XML parser input + * @len: an indicative size for the lookahead + * + * This function increase the input for the parser. It tries to + * preserve pointers to the input buffer, and keep already read data + * + * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the + * end of this entity + */ +int +xmlParserInputGrow(xmlParserInputPtr in, int len) { + int ret; + int indx; + +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, "Grow\n"); +#endif + if (in->buf == NULL) return(-1); + if (in->base == NULL) return(-1); + if (in->cur == NULL) return(-1); + if (in->buf->buffer == NULL) return(-1); + + CHECK_BUFFER(in); + + indx = in->cur - in->base; + if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { + + CHECK_BUFFER(in); + + return(0); + } + if (in->buf->readcallback != NULL) + ret = xmlParserInputBufferGrow(in->buf, len); + else + return(0); + + /* + * NOTE : in->base may be a "dangling" i.e. freed pointer in this + * block, but we use it really as an integer to do some + * pointer arithmetic. Insure will raise it as a bug but in + * that specific case, that's not ! + */ + if (in->base != in->buf->buffer->content) { + /* + * the buffer has been reallocated + */ + indx = in->cur - in->base; + in->base = in->buf->buffer->content; + in->cur = &in->buf->buffer->content[indx]; + } + in->end = &in->buf->buffer->content[in->buf->buffer->use]; + + CHECK_BUFFER(in); + + return(ret); +} + +/** + * xmlParserInputShrink: + * @in: an XML parser input + * + * This function removes used input for the parser. + */ +void +xmlParserInputShrink(xmlParserInputPtr in) { + int used; + int ret; + int indx; + +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, "Shrink\n"); +#endif + if (in->buf == NULL) return; + if (in->base == NULL) return; + if (in->cur == NULL) return; + if (in->buf->buffer == NULL) return; + + CHECK_BUFFER(in); + + used = in->cur - in->buf->buffer->content; + /* + * Do not shrink on large buffers whose only a tiny fraction + * was consumed + */ + if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK) + return; + if (used > INPUT_CHUNK) { + ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); + if (ret > 0) { + in->cur -= ret; + in->consumed += ret; + } + in->end = &in->buf->buffer->content[in->buf->buffer->use]; + } + + CHECK_BUFFER(in); + + if (in->buf->buffer->use > INPUT_CHUNK) { + return; + } + xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); + if (in->base != in->buf->buffer->content) { + /* + * the buffer has been reallocated + */ + indx = in->cur - in->base; + in->base = in->buf->buffer->content; + in->cur = &in->buf->buffer->content[indx]; + } + in->end = &in->buf->buffer->content[in->buf->buffer->use]; + + CHECK_BUFFER(in); +} + +/************************************************************************ + * * + * UTF8 character input and related functions * + * * + ************************************************************************/ + +/** + * xmlNextChar: + * @ctxt: the XML parser context + * + * Skip to the next char input char. + */ + +void +xmlNextChar(xmlParserCtxtPtr ctxt) { + if (ctxt->instate == XML_PARSER_EOF) + return; + + /* + * 2.11 End-of-Line Handling + * the literal two-character sequence "#xD#xA" or a standalone + * literal #xD, an XML processor must pass to the application + * the single character #xA. + */ + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && + (ctxt->instate != XML_PARSER_COMMENT)) { + /* + * If we are at the end of the current entity and + * the context allows it, we pop consumed entities + * automatically. + * the auto closing should be blocked in other cases + */ + xmlPopInput(ctxt); + } else { + if (*(ctxt->input->cur) == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else ctxt->input->col++; + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + const unsigned char *cur = ctxt->input->cur; + unsigned char c; + + c = *cur; + if (c & 0x80) { + if (cur[1] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + unsigned int val; + + if (cur[2] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (cur[3] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + ctxt->input->cur += 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + ctxt->input->cur += 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + if (((val > 0xd7ff) && (val < 0xe000)) || + ((val > 0xfffd) && (val < 0x10000)) || + (val >= 0x110000)) { + if ((ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Char 0x%X out of allowed range\n", val); + ctxt->errNo = XML_ERR_INVALID_ENCODING; + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } else + /* 2-byte code */ + ctxt->input->cur += 2; + } else + /* 1-byte code */ + ctxt->input->cur++; + } else { + /* + * Assume it's a fixed length encoding (1) with + * a compatible encoding for the ASCII set, since + * XML constructs only use < 128 chars + */ + ctxt->input->cur++; + } + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + } + } else { + ctxt->input->cur++; + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + } + if ((*ctxt->input->cur == '%') && (!ctxt->html)) + xmlParserHandlePEReference(ctxt); + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) + xmlPopInput(ctxt); + return; +encoding_error: + /* + * If we detect an UTF8 error that probably mean that the + * input encoding didn't get properly advertised in the + * declaration header. Report the error and switch the encoding + * to ISO-Latin-1 (if you don't like this policy, just declare the + * encoding !) + */ + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { + ctxt->sax->error(ctxt->userData, + "Input is not proper UTF-8, indicate encoding !\n"); + ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); + } + ctxt->wellFormed = 0; + ctxt->errNo = XML_ERR_INVALID_ENCODING; + + ctxt->charset = XML_CHAR_ENCODING_8859_1; + ctxt->input->cur++; + return; +} + +/** + * xmlCurrentChar: + * @ctxt: the XML parser context + * @len: pointer to the length of the char read + * + * The current char value, if using UTF-8 this may actually span multiple + * bytes in the input buffer. Implement the end of line normalization: + * 2.11 End-of-Line Handling + * Wherever an external parsed entity or the literal entity value + * of an internal parsed entity contains either the literal two-character + * sequence "#xD#xA" or a standalone literal #xD, an XML processor + * must pass to the application the single character #xA. + * This behavior can conveniently be produced by normalizing all + * line breaks to #xA on input, before parsing.) + * + * Returns the current char value and its length + */ + +int +xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { + if (ctxt->instate == XML_PARSER_EOF) + return(0); + + if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { + *len = 1; + return((int) *ctxt->input->cur); + } + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + const unsigned char *cur = ctxt->input->cur; + unsigned char c; + unsigned int val; + + c = *cur; + if (c & 0x80) { + if (cur[1] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + + if (cur[2] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (cur[3] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + if (!IS_CHAR(val)) { + if ((ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Char 0x%X out of allowed range\n", val); + ctxt->errNo = XML_ERR_INVALID_ENCODING; + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + return(val); + } else { + /* 1-byte code */ + *len = 1; + if (*ctxt->input->cur == 0xD) { + if (ctxt->input->cur[1] == 0xA) { + ctxt->nbChars++; + ctxt->input->cur++; + } + return(0xA); + } + return((int) *ctxt->input->cur); + } + } + /* + * Assume it's a fixed length encoding (1) with + * a compatible encoding for the ASCII set, since + * XML constructs only use < 128 chars + */ + *len = 1; + if (*ctxt->input->cur == 0xD) { + if (ctxt->input->cur[1] == 0xA) { + ctxt->nbChars++; + ctxt->input->cur++; + } + return(0xA); + } + return((int) *ctxt->input->cur); +encoding_error: + /* + * An encoding problem may arise from a truncated input buffer + * splitting a character in the middle. In that case do not raise + * an error but return 0 to endicate an end of stream problem + */ + if (ctxt->input->end - ctxt->input->cur < 4) { + *len = 0; + return(0); + } + + /* + * If we detect an UTF8 error that probably mean that the + * input encoding didn't get properly advertised in the + * declaration header. Report the error and switch the encoding + * to ISO-Latin-1 (if you don't like this policy, just declare the + * encoding !) + */ + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { + ctxt->sax->error(ctxt->userData, + "Input is not proper UTF-8, indicate encoding !\n"); + ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); + } + ctxt->wellFormed = 0; + ctxt->errNo = XML_ERR_INVALID_ENCODING; + + ctxt->charset = XML_CHAR_ENCODING_8859_1; + *len = 1; + return((int) *ctxt->input->cur); +} + +/** + * xmlStringCurrentChar: + * @ctxt: the XML parser context + * @cur: pointer to the beginning of the char + * @len: pointer to the length of the char read + * + * The current char value, if using UTF-8 this may actually span multiple + * bytes in the input buffer. + * + * Returns the current char value and its length + */ + +int +xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) +{ + if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + unsigned char c; + unsigned int val; + + c = *cur; + if (c & 0x80) { + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + if (!IS_CHAR(val)) { + if ((ctxt != NULL) && (ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Char 0x%X out of allowed range\n", + val); + if (ctxt != NULL) { + ctxt->errNo = XML_ERR_INVALID_ENCODING; + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } + } + return (val); + } else { + /* 1-byte code */ + *len = 1; + return ((int) *cur); + } + } + /* + * Assume it's a fixed length encoding (1) with + * a compatible encoding for the ASCII set, since + * XML constructs only use < 128 chars + */ + *len = 1; + return ((int) *cur); +encoding_error: + + /* + * If we detect an UTF8 error that probably mean that the + * input encoding didn't get properly advertised in the + * declaration header. Report the error and switch the encoding + * to ISO-Latin-1 (if you don't like this policy, just declare the + * encoding !) + */ + if (ctxt != NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { + ctxt->sax->error(ctxt->userData, + "Input is not proper UTF-8, indicate encoding !\n"); + ctxt->sax->error(ctxt->userData, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); + } + ctxt->errNo = XML_ERR_INVALID_ENCODING; + ctxt->wellFormed = 0; + } + + *len = 1; + return ((int) *cur); +} + +/** + * xmlCopyCharMultiByte: + * @out: pointer to an array of xmlChar + * @val: the char value + * + * append the char value in the array + * + * Returns the number of xmlChar written + */ +int +xmlCopyCharMultiByte(xmlChar *out, int val) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + */ + if (val >= 0x80) { + xmlChar *savedout = out; + int bits; + if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } + else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} + else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } + else { + xmlGenericError(xmlGenericErrorContext, + "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", + val); + return(0); + } + for ( ; bits >= 0; bits-= 6) + *out++= ((val >> bits) & 0x3F) | 0x80 ; + return (out - savedout); + } + *out = (xmlChar) val; + return 1; +} + +/** + * xmlCopyChar: + * @len: Ignored, compatibility + * @out: pointer to an array of xmlChar + * @val: the char value + * + * append the char value in the array + * + * Returns the number of xmlChar written + */ + +int +xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { + /* the len parameter is ignored */ + if (val >= 0x80) { + return(xmlCopyCharMultiByte (out, val)); + } + *out = (xmlChar) val; + return 1; +} + +/************************************************************************ + * * + * Commodity functions to switch encodings * + * * + ************************************************************************/ + +/** + * xmlSwitchEncoding: + * @ctxt: the parser context + * @enc: the encoding value (number) + * + * change the input functions when discovering the character encoding + * of a given entity. + * + * Returns 0 in case of success, -1 otherwise + */ +int +xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) +{ + xmlCharEncodingHandlerPtr handler; + + switch (enc) { + case XML_CHAR_ENCODING_ERROR: + ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "encoding unknown\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + break; + case XML_CHAR_ENCODING_NONE: + /* let's assume it's UTF-8 without the XML decl */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(0); + case XML_CHAR_ENCODING_UTF8: + /* default encoding, no conversion should be needed */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + + /* + * Errata on XML-1.0 June 20 2001 + * Specific handling of the Byte Order Mark for + * UTF-8 + */ + if ((ctxt->input != NULL) && + (ctxt->input->cur[0] == 0xEF) && + (ctxt->input->cur[1] == 0xBB) && + (ctxt->input->cur[2] == 0xBF)) { + ctxt->input->cur += 3; + } + return(0); + default: + break; + } + handler = xmlGetCharEncodingHandler(enc); + if (handler == NULL) { + /* + * Default handlers. + */ + switch (enc) { + case XML_CHAR_ENCODING_ERROR: + ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "encoding unknown\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->charset = XML_CHAR_ENCODING_UTF8; + break; + case XML_CHAR_ENCODING_NONE: + /* let's assume it's UTF-8 without the XML decl */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(0); + case XML_CHAR_ENCODING_UTF8: + case XML_CHAR_ENCODING_ASCII: + /* default encoding, no conversion should be needed */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(0); + case XML_CHAR_ENCODING_UTF16LE: + break; + case XML_CHAR_ENCODING_UTF16BE: + break; + case XML_CHAR_ENCODING_UCS4LE: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding USC4 little endian not supported\n"); + break; + case XML_CHAR_ENCODING_UCS4BE: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding USC4 big endian not supported\n"); + break; + case XML_CHAR_ENCODING_EBCDIC: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding EBCDIC not supported\n"); + break; + case XML_CHAR_ENCODING_UCS4_2143: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding UCS4 2143 not supported\n"); + break; + case XML_CHAR_ENCODING_UCS4_3412: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding UCS4 3412 not supported\n"); + break; + case XML_CHAR_ENCODING_UCS2: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding UCS2 not supported\n"); + break; + case XML_CHAR_ENCODING_8859_1: + case XML_CHAR_ENCODING_8859_2: + case XML_CHAR_ENCODING_8859_3: + case XML_CHAR_ENCODING_8859_4: + case XML_CHAR_ENCODING_8859_5: + case XML_CHAR_ENCODING_8859_6: + case XML_CHAR_ENCODING_8859_7: + case XML_CHAR_ENCODING_8859_8: + case XML_CHAR_ENCODING_8859_9: + /* + * We used to keep the internal content in the + * document encoding however this turns being unmaintainable + * So xmlGetCharEncodingHandler() will return non-null + * values for this now. + */ + if ((ctxt->inputNr == 1) && + (ctxt->encoding == NULL) && + (ctxt->input->encoding != NULL)) { + ctxt->encoding = xmlStrdup(ctxt->input->encoding); + } + ctxt->charset = enc; + return(0); + case XML_CHAR_ENCODING_2022_JP: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding ISO-2022-JPnot supported\n"); + break; + case XML_CHAR_ENCODING_SHIFT_JIS: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding Shift_JIS not supported\n"); + break; + case XML_CHAR_ENCODING_EUC_JP: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding EUC-JPnot supported\n"); + break; + } + } + if (handler == NULL) + return(-1); + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(xmlSwitchToEncoding(ctxt, handler)); +} + +/** + * xmlSwitchToEncoding: + * @ctxt: the parser context + * @handler: the encoding handler + * + * change the input functions when discovering the character encoding + * of a given entity. + * + * Returns 0 in case of success, -1 otherwise + */ +int +xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) +{ + int nbchars; + + if (handler != NULL) { + if (ctxt->input != NULL) { + if (ctxt->input->buf != NULL) { + if (ctxt->input->buf->encoder != NULL) { + /* + * Check in case the auto encoding detetection triggered + * in already. + */ + if (ctxt->input->buf->encoder == handler) + return(0); + + /* + * "UTF-16" can be used for both LE and BE + */ + if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UTF-16", 6)) && + (!xmlStrncmp(BAD_CAST handler->name, + BAD_CAST "UTF-16", 6))) { + return(0); + } + + /* + * Note: this is a bit dangerous, but that's what it + * takes to use nearly compatible signature for different + * encodings. + */ + xmlCharEncCloseFunc(ctxt->input->buf->encoder); + ctxt->input->buf->encoder = handler; + return(0); + } + ctxt->input->buf->encoder = handler; + + /* + * Is there already some content down the pipe to convert ? + */ + if ((ctxt->input->buf->buffer != NULL) && + (ctxt->input->buf->buffer->use > 0)) { + int processed; + + /* + * Specific handling of the Byte Order Mark for + * UTF-16 + */ + if ((handler->name != NULL) && + (!strcmp(handler->name, "UTF-16LE")) && + (ctxt->input->cur[0] == 0xFF) && + (ctxt->input->cur[1] == 0xFE)) { + ctxt->input->cur += 2; + } + if ((handler->name != NULL) && + (!strcmp(handler->name, "UTF-16BE")) && + (ctxt->input->cur[0] == 0xFE) && + (ctxt->input->cur[1] == 0xFF)) { + ctxt->input->cur += 2; + } + /* + * Errata on XML-1.0 June 20 2001 + * Specific handling of the Byte Order Mark for + * UTF-8 + */ + if ((handler->name != NULL) && + (!strcmp(handler->name, "UTF-8")) && + (ctxt->input->cur[0] == 0xEF) && + (ctxt->input->cur[1] == 0xBB) && + (ctxt->input->cur[2] == 0xBF)) { + ctxt->input->cur += 3; + } + + /* + * Shrink the current input buffer. + * Move it as the raw buffer and create a new input buffer + */ + processed = ctxt->input->cur - ctxt->input->base; + xmlBufferShrink(ctxt->input->buf->buffer, processed); + ctxt->input->buf->raw = ctxt->input->buf->buffer; + ctxt->input->buf->buffer = xmlBufferCreate(); + + if (ctxt->html) { + /* + * convert as much as possible of the buffer + */ + nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, + ctxt->input->buf->buffer, + ctxt->input->buf->raw); + } else { + /* + * convert just enough to get + * '<?xml version="1.0" encoding="xxx"?>' + * parsed with the autodetected encoding + * into the parser reading buffer. + */ + nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder, + ctxt->input->buf->buffer, + ctxt->input->buf->raw); + } + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlSwitchToEncoding: encoder error\n"); + return(-1); + } + ctxt->input->base = + ctxt->input->cur = ctxt->input->buf->buffer->content; + ctxt->input->end = + &ctxt->input->base[ctxt->input->buf->buffer->use]; + + } + return(0); + } else { + if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) { + /* + * When parsing a static memory array one must know the + * size to be able to convert the buffer. + */ + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSwitchToEncoding : no input\n"); + return(-1); + } else { + int processed; + + /* + * Shrink the current input buffer. + * Move it as the raw buffer and create a new input buffer + */ + processed = ctxt->input->cur - ctxt->input->base; + + ctxt->input->buf->raw = xmlBufferCreate(); + xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur, + ctxt->input->length - processed); + ctxt->input->buf->buffer = xmlBufferCreate(); + + /* + * convert as much as possible of the raw input + * to the parser reading buffer. + */ + nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, + ctxt->input->buf->buffer, + ctxt->input->buf->raw); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlSwitchToEncoding: encoder error\n"); + return(-1); + } + + /* + * Conversion succeeded, get rid of the old buffer + */ + if ((ctxt->input->free != NULL) && + (ctxt->input->base != NULL)) + ctxt->input->free((xmlChar *) ctxt->input->base); + ctxt->input->base = + ctxt->input->cur = ctxt->input->buf->buffer->content; + ctxt->input->end = + &ctxt->input->base[ctxt->input->buf->buffer->use]; + } + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSwitchToEncoding : no input\n"); + return(-1); + } + /* + * The parsing is now done in UTF8 natively + */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + } else + return(-1); + return(0); + +} + +/************************************************************************ + * * + * Commodity functions to handle entities processing * + * * + ************************************************************************/ + +/** + * xmlFreeInputStream: + * @input: an xmlParserInputPtr + * + * Free up an input stream. + */ +void +xmlFreeInputStream(xmlParserInputPtr input) { + if (input == NULL) return; + + if (input->filename != NULL) xmlFree((char *) input->filename); + if (input->directory != NULL) xmlFree((char *) input->directory); + if (input->encoding != NULL) xmlFree((char *) input->encoding); + if (input->version != NULL) xmlFree((char *) input->version); + if ((input->free != NULL) && (input->base != NULL)) + input->free((xmlChar *) input->base); + if (input->buf != NULL) + xmlFreeParserInputBuffer(input->buf); + xmlFree(input); +} + +/** + * xmlNewInputStream: + * @ctxt: an XML parser context + * + * Create a new input stream structure + * Returns the new input stream or NULL + */ +xmlParserInputPtr +xmlNewInputStream(xmlParserCtxtPtr ctxt) { + xmlParserInputPtr input; + + input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); + if (input == NULL) { + if (ctxt != NULL) { + ctxt->errNo = XML_ERR_NO_MEMORY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "malloc: couldn't allocate a new input stream\n"); + ctxt->errNo = XML_ERR_NO_MEMORY; + } + return(NULL); + } + memset(input, 0, sizeof(xmlParserInput)); + input->line = 1; + input->col = 1; + input->standalone = -1; + return(input); +} + +/** + * xmlNewIOInputStream: + * @ctxt: an XML parser context + * @input: an I/O Input + * @enc: the charset encoding if known + * + * Create a new input stream structure encapsulating the @input into + * a stream suitable for the parser. + * + * Returns the new input stream or NULL + */ +xmlParserInputPtr +xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, + xmlCharEncoding enc) { + xmlParserInputPtr inputStream; + + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); + inputStream = xmlNewInputStream(ctxt); + if (inputStream == NULL) { + return(NULL); + } + inputStream->filename = NULL; + inputStream->buf = input; + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + + return(inputStream); +} + +/** + * xmlNewEntityInputStream: + * @ctxt: an XML parser context + * @entity: an Entity pointer + * + * Create a new input stream based on an xmlEntityPtr + * + * Returns the new input stream or NULL + */ +xmlParserInputPtr +xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { + xmlParserInputPtr input; + + if (entity == NULL) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "internal: xmlNewEntityInputStream entity = NULL\n"); + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + return(NULL); + } + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "new input from entity: %s\n", entity->name); + if (entity->content == NULL) { + switch (entity->etype) { + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + ctxt->errNo = XML_ERR_UNPARSED_ENTITY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlNewEntityInputStream unparsed entity !\n"); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + return(xmlLoadExternalEntity((char *) entity->URI, + (char *) entity->ExternalID, ctxt)); + case XML_INTERNAL_GENERAL_ENTITY: + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Internal entity %s without content !\n", entity->name); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Internal parameter entity %s without content !\n", entity->name); + break; + case XML_INTERNAL_PREDEFINED_ENTITY: + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Predefined entity %s without content !\n", entity->name); + break; + } + return(NULL); + } + input = xmlNewInputStream(ctxt); + if (input == NULL) { + return(NULL); + } + input->filename = (char *) entity->URI; + input->base = entity->content; + input->cur = entity->content; + input->length = entity->length; + input->end = &entity->content[input->length]; + return(input); +} + +/** + * xmlNewStringInputStream: + * @ctxt: an XML parser context + * @buffer: an memory buffer + * + * Create a new input stream based on a memory buffer. + * Returns the new input stream + */ +xmlParserInputPtr +xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { + xmlParserInputPtr input; + + if (buffer == NULL) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "internal: xmlNewStringInputStream string = NULL\n"); + return(NULL); + } + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "new fixed input: %.30s\n", buffer); + input = xmlNewInputStream(ctxt); + if (input == NULL) { + return(NULL); + } + input->base = buffer; + input->cur = buffer; + input->length = xmlStrlen(buffer); + input->end = &buffer[input->length]; + return(input); +} + +/** + * xmlNewInputFromFile: + * @ctxt: an XML parser context + * @filename: the filename to use as entity + * + * Create a new input stream based on a file. + * + * Returns the new input stream or NULL in case of error + */ +xmlParserInputPtr +xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { + xmlParserInputBufferPtr buf; + xmlParserInputPtr inputStream; + char *directory = NULL; + xmlChar *URI = NULL; + + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "new input from file: %s\n", filename); + if (ctxt == NULL) return(NULL); + buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); + if (buf == NULL) + return(NULL); + + URI = xmlStrdup((xmlChar *) filename); + directory = xmlParserGetDirectory((const char *) URI); + + inputStream = xmlNewInputStream(ctxt); + if (inputStream == NULL) { + if (directory != NULL) xmlFree((char *) directory); + if (URI != NULL) xmlFree((char *) URI); + return(NULL); + } + + inputStream->filename = (const char *) URI; + inputStream->directory = directory; + inputStream->buf = buf; + + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; + if ((ctxt->directory == NULL) && (directory != NULL)) + ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); + return(inputStream); +} + +/************************************************************************ + * * + * Commodity functions to handle parser contexts * + * * + ************************************************************************/ + +/** + * xmlInitParserCtxt: + * @ctxt: an XML parser context + * + * Initialize a parser context + */ + +void +xmlInitParserCtxt(xmlParserCtxtPtr ctxt) +{ + if(ctxt==NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlInitParserCtxt: NULL context given\n"); + return; + } + + xmlDefaultSAXHandlerInit(); + + ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); + if (ctxt->sax == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlInitParserCtxt: out of memory\n"); + } + else + memcpy(ctxt->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler)); + + /* Allocate the Input stack */ + ctxt->inputTab = (xmlParserInputPtr *) + xmlMalloc(5 * sizeof(xmlParserInputPtr)); + if (ctxt->inputTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlInitParserCtxt: out of memory\n"); + ctxt->inputNr = 0; + ctxt->inputMax = 0; + ctxt->input = NULL; + return; + } + ctxt->inputNr = 0; + ctxt->inputMax = 5; + ctxt->input = NULL; + + ctxt->version = NULL; + ctxt->encoding = NULL; + ctxt->standalone = -1; + ctxt->hasExternalSubset = 0; + ctxt->hasPErefs = 0; + ctxt->html = 0; + ctxt->external = 0; + ctxt->instate = XML_PARSER_START; + ctxt->token = 0; + ctxt->directory = NULL; + + /* Allocate the Node stack */ + ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); + if (ctxt->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlInitParserCtxt: out of memory\n"); + ctxt->nodeNr = 0; + ctxt->nodeMax = 0; + ctxt->node = NULL; + ctxt->inputNr = 0; + ctxt->inputMax = 0; + ctxt->input = NULL; + return; + } + ctxt->nodeNr = 0; + ctxt->nodeMax = 10; + ctxt->node = NULL; + + /* Allocate the Name stack */ + ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); + if (ctxt->nameTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlInitParserCtxt: out of memory\n"); + ctxt->nodeNr = 0; + ctxt->nodeMax = 0; + ctxt->node = NULL; + ctxt->inputNr = 0; + ctxt->inputMax = 0; + ctxt->input = NULL; + ctxt->nameNr = 0; + ctxt->nameMax = 0; + ctxt->name = NULL; + return; + } + ctxt->nameNr = 0; + ctxt->nameMax = 10; + ctxt->name = NULL; + + /* Allocate the space stack */ + ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); + if (ctxt->spaceTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlInitParserCtxt: out of memory\n"); + ctxt->nodeNr = 0; + ctxt->nodeMax = 0; + ctxt->node = NULL; + ctxt->inputNr = 0; + ctxt->inputMax = 0; + ctxt->input = NULL; + ctxt->nameNr = 0; + ctxt->nameMax = 0; + ctxt->name = NULL; + ctxt->spaceNr = 0; + ctxt->spaceMax = 0; + ctxt->space = NULL; + return; + } + ctxt->spaceNr = 1; + ctxt->spaceMax = 10; + ctxt->spaceTab[0] = -1; + ctxt->space = &ctxt->spaceTab[0]; + ctxt->userData = ctxt; + ctxt->myDoc = NULL; + ctxt->wellFormed = 1; + ctxt->valid = 1; + ctxt->loadsubset = xmlLoadExtDtdDefaultValue; + ctxt->validate = xmlDoValidityCheckingDefaultValue; + ctxt->pedantic = xmlPedanticParserDefaultValue; + ctxt->linenumbers = xmlLineNumbersDefaultValue; + ctxt->keepBlanks = xmlKeepBlanksDefaultValue; + if (ctxt->keepBlanks == 0) + ctxt->sax->ignorableWhitespace = ignorableWhitespace; + + ctxt->vctxt.userData = ctxt; + ctxt->vctxt.error = xmlParserValidityError; + ctxt->vctxt.warning = xmlParserValidityWarning; + if (ctxt->validate) { + if (xmlGetWarningsDefaultValue == 0) + ctxt->vctxt.warning = NULL; + else + ctxt->vctxt.warning = xmlParserValidityWarning; + ctxt->vctxt.nodeMax = 0; + } + ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; + ctxt->record_info = 0; + ctxt->nbChars = 0; + ctxt->checkIndex = 0; + ctxt->inSubset = 0; + ctxt->errNo = XML_ERR_OK; + ctxt->depth = 0; + ctxt->charset = XML_CHAR_ENCODING_UTF8; + ctxt->catalogs = NULL; + xmlInitNodeInfoSeq(&ctxt->node_seq); +} + +/** + * xmlFreeParserCtxt: + * @ctxt: an XML parser context + * + * Free all the memory used by a parser context. However the parsed + * document in ctxt->myDoc is not freed. + */ + +void +xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) +{ + xmlParserInputPtr input; + xmlChar *oldname; + + if (ctxt == NULL) return; + + while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ + xmlFreeInputStream(input); + } + while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */ + xmlFree(oldname); + } + if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); + if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab); + if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); + if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); + if (ctxt->version != NULL) xmlFree((char *) ctxt->version); + if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); + if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName); + if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); + if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); + if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler)) + xmlFree(ctxt->sax); + if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); + if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); +#ifdef LIBXML_CATALOG_ENABLED + if (ctxt->catalogs != NULL) + xmlCatalogFreeLocal(ctxt->catalogs); +#endif + xmlFree(ctxt); +} + +/** + * xmlNewParserCtxt: + * + * Allocate and initialize a new parser context. + * + * Returns the xmlParserCtxtPtr or NULL + */ + +xmlParserCtxtPtr +xmlNewParserCtxt() +{ + xmlParserCtxtPtr ctxt; + + ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); + if (ctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewParserCtxt : cannot allocate context\n"); + xmlGenericError(xmlGenericErrorContext, "malloc failed"); + return(NULL); + } + memset(ctxt, 0, sizeof(xmlParserCtxt)); + xmlInitParserCtxt(ctxt); + return(ctxt); +} + +/************************************************************************ + * * + * Handling of node informations * + * * + ************************************************************************/ + +/** + * xmlClearParserCtxt: + * @ctxt: an XML parser context + * + * Clear (release owned resources) and reinitialize a parser context + */ + +void +xmlClearParserCtxt(xmlParserCtxtPtr ctxt) +{ + if (ctxt==NULL) + return; + xmlClearNodeInfoSeq(&ctxt->node_seq); + xmlInitParserCtxt(ctxt); +} + +/** + * xmlParserFindNodeInfo: + * @ctx: an XML parser context + * @node: an XML node within the tree + * + * Find the parser node info struct for a given node + * + * Returns an xmlParserNodeInfo block pointer or NULL + */ +const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, + const xmlNodePtr node) +{ + unsigned long pos; + + /* Find position where node should be at */ + pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); + if (pos < ctx->node_seq.length && ctx->node_seq.buffer[pos].node == node) + return &ctx->node_seq.buffer[pos]; + else + return NULL; +} + + +/** + * xmlInitNodeInfoSeq: + * @seq: a node info sequence pointer + * + * -- Initialize (set to initial state) node info sequence + */ +void +xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) +{ + seq->length = 0; + seq->maximum = 0; + seq->buffer = NULL; +} + +/** + * xmlClearNodeInfoSeq: + * @seq: a node info sequence pointer + * + * -- Clear (release memory and reinitialize) node + * info sequence + */ +void +xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) +{ + if ( seq->buffer != NULL ) + xmlFree(seq->buffer); + xmlInitNodeInfoSeq(seq); +} + + +/** + * xmlParserFindNodeInfoIndex: + * @seq: a node info sequence pointer + * @node: an XML node pointer + * + * + * xmlParserFindNodeInfoIndex : Find the index that the info record for + * the given node is or should be at in a sorted sequence + * + * Returns a long indicating the position of the record + */ +unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, + const xmlNodePtr node) +{ + unsigned long upper, lower, middle; + int found = 0; + + /* Do a binary search for the key */ + lower = 1; + upper = seq->length; + middle = 0; + while ( lower <= upper && !found) { + middle = lower + (upper - lower) / 2; + if ( node == seq->buffer[middle - 1].node ) + found = 1; + else if ( node < seq->buffer[middle - 1].node ) + upper = middle - 1; + else + lower = middle + 1; + } + + /* Return position */ + if ( middle == 0 || seq->buffer[middle - 1].node < node ) + return middle; + else + return middle - 1; +} + + +/** + * xmlParserAddNodeInfo: + * @ctxt: an XML parser context + * @info: a node info sequence pointer + * + * Insert node info record into the sorted sequence + */ +void +xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, + const xmlParserNodeInfoPtr info) +{ + unsigned long pos; + + /* Find pos and check to see if node is already in the sequence */ + pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (const xmlNodePtr) + info->node); + if (pos < ctxt->node_seq.length + && ctxt->node_seq.buffer[pos].node == info->node) { + ctxt->node_seq.buffer[pos] = *info; + } + + /* Otherwise, we need to add new node to buffer */ + else { + if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { + xmlParserNodeInfo *tmp_buffer; + unsigned int byte_size; + + if (ctxt->node_seq.maximum == 0) + ctxt->node_seq.maximum = 2; + byte_size = (sizeof(*ctxt->node_seq.buffer) * + (2 * ctxt->node_seq.maximum)); + + if (ctxt->node_seq.buffer == NULL) + tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); + else + tmp_buffer = + (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, + byte_size); + + if (tmp_buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Out of memory\n"); + ctxt->errNo = XML_ERR_NO_MEMORY; + return; + } + ctxt->node_seq.buffer = tmp_buffer; + ctxt->node_seq.maximum *= 2; + } + + /* If position is not at end, move elements out of the way */ + if (pos != ctxt->node_seq.length) { + unsigned long i; + + for (i = ctxt->node_seq.length; i > pos; i--) + ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; + } + + /* Copy element and increase length */ + ctxt->node_seq.buffer[pos] = *info; + ctxt->node_seq.length++; + } +} + +/************************************************************************ + * * + * Defaults settings * + * * + ************************************************************************/ +/** + * xmlPedanticParserDefault: + * @val: int 0 or 1 + * + * Set and return the previous value for enabling pedantic warnings. + * + * Returns the last value for 0 for no substitution, 1 for substitution. + */ + +int +xmlPedanticParserDefault(int val) { + int old = xmlPedanticParserDefaultValue; + + xmlPedanticParserDefaultValue = val; + return(old); +} + +/** + * xmlLineNumbersDefault: + * @val: int 0 or 1 + * + * Set and return the previous value for enabling line numbers in elements + * contents. This may break on old application and is turned off by default. + * + * Returns the last value for 0 for no substitution, 1 for substitution. + */ + +int +xmlLineNumbersDefault(int val) { + int old = xmlLineNumbersDefaultValue; + + xmlLineNumbersDefaultValue = val; + return(old); +} + +/** + * xmlSubstituteEntitiesDefault: + * @val: int 0 or 1 + * + * Set and return the previous value for default entity support. + * Initially the parser always keep entity references instead of substituting + * entity values in the output. This function has to be used to change the + * default parser behavior + * SAX::substituteEntities() has to be used for changing that on a file by + * file basis. + * + * Returns the last value for 0 for no substitution, 1 for substitution. + */ + +int +xmlSubstituteEntitiesDefault(int val) { + int old = xmlSubstituteEntitiesDefaultValue; + + xmlSubstituteEntitiesDefaultValue = val; + return(old); +} + +/** + * xmlKeepBlanksDefault: + * @val: int 0 or 1 + * + * Set and return the previous value for default blanks text nodes support. + * The 1.x version of the parser used an heuristic to try to detect + * ignorable white spaces. As a result the SAX callback was generating + * ignorableWhitespace() callbacks instead of characters() one, and when + * using the DOM output text nodes containing those blanks were not generated. + * The 2.x and later version will switch to the XML standard way and + * ignorableWhitespace() are only generated when running the parser in + * validating mode and when the current element doesn't allow CDATA or + * mixed content. + * This function is provided as a way to force the standard behavior + * on 1.X libs and to switch back to the old mode for compatibility when + * running 1.X client code on 2.X . Upgrade of 1.X code should be done + * by using xmlIsBlankNode() commodity function to detect the "empty" + * nodes generated. + * This value also affect autogeneration of indentation when saving code + * if blanks sections are kept, indentation is not generated. + * + * Returns the last value for 0 for no substitution, 1 for substitution. + */ + +int +xmlKeepBlanksDefault(int val) { + int old = xmlKeepBlanksDefaultValue; + + xmlKeepBlanksDefaultValue = val; + xmlIndentTreeOutput = !val; + return(old); +} + +/************************************************************************ + * * + * Deprecated functions kept for compatibility * + * * + ************************************************************************/ + +/** + * xmlCheckLanguageID: + * @lang: pointer to the string value + * + * Checks that the value conforms to the LanguageID production: + * + * NOTE: this is somewhat deprecated, those productions were removed from + * the XML Second edition. + * + * [33] LanguageID ::= Langcode ('-' Subcode)* + * [34] Langcode ::= ISO639Code | IanaCode | UserCode + * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) + * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ + * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ + * [38] Subcode ::= ([a-z] | [A-Z])+ + * + * Returns 1 if correct 0 otherwise + **/ +int +xmlCheckLanguageID(const xmlChar *lang) { + const xmlChar *cur = lang; + + if (cur == NULL) + return(0); + if (((cur[0] == 'i') && (cur[1] == '-')) || + ((cur[0] == 'I') && (cur[1] == '-'))) { + /* + * IANA code + */ + cur += 2; + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ + ((cur[0] >= 'a') && (cur[0] <= 'z'))) + cur++; + } else if (((cur[0] == 'x') && (cur[1] == '-')) || + ((cur[0] == 'X') && (cur[1] == '-'))) { + /* + * User code + */ + cur += 2; + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ + ((cur[0] >= 'a') && (cur[0] <= 'z'))) + cur++; + } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || + ((cur[0] >= 'a') && (cur[0] <= 'z'))) { + /* + * ISO639 + */ + cur++; + if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || + ((cur[0] >= 'a') && (cur[0] <= 'z'))) + cur++; + else + return(0); + } else + return(0); + while (cur[0] != 0) { /* non input consuming */ + if (cur[0] != '-') + return(0); + cur++; + if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || + ((cur[0] >= 'a') && (cur[0] <= 'z'))) + cur++; + else + return(0); + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ + ((cur[0] >= 'a') && (cur[0] <= 'z'))) + cur++; + } + return(1); +} + +/** + * xmlDecodeEntities: + * @ctxt: the parser context + * @len: the len to decode (in bytes !), -1 for no size limit + * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF + * @end: an end marker xmlChar, 0 if none + * @end2: an end marker xmlChar, 0 if none + * @end3: an end marker xmlChar, 0 if none + * + * This function is deprecated, we now always process entities content + * through xmlStringDecodeEntities + * + * TODO: remove it in next major release. + * + * [67] Reference ::= EntityRef | CharRef + * + * [69] PEReference ::= '%' Name ';' + * + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +xmlChar * +xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED, + xmlChar end ATTRIBUTE_UNUSED, xmlChar end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) { +#if 0 + xmlChar *buffer = NULL; + unsigned int buffer_size = 0; + unsigned int nbchars = 0; + + xmlChar *current = NULL; + xmlEntityPtr ent; + unsigned int max = (unsigned int) len; + int c,l; +#endif + + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlDecodeEntities() deprecated function reached\n"); + deprecated = 1; + } + +#if 0 + if (ctxt->depth > 40) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Detected entity reference loop\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->errNo = XML_ERR_ENTITY_LOOP; + return(NULL); + } + + /* + * allocate a translation buffer. + */ + buffer_size = XML_PARSER_BIG_BUFFER_SIZE; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlDecodeEntities: malloc failed"); + return(NULL); + } + + /* + * OK loop until we reach one of the ending char or a size limit. + */ + GROW; + c = CUR_CHAR(l); + while ((nbchars < max) && (c != end) && /* NOTUSED */ + (c != end2) && (c != end3)) { + GROW; + if (c == 0) break; + if ((c == '&') && (NXT(1) == '#')) { + int val = xmlParseCharRef(ctxt); + COPY_BUF(0,buffer,nbchars,val); + NEXTL(l); + } else if (c == '&') && + (what & XML_SUBSTITUTE_REF)) { + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "decoding Entity Reference\n"); + ent = xmlParseEntityRef(ctxt); + if ((ent != NULL) && + (ctxt->replaceEntities != 0)) { + current = ent->content; + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + } else if (ent != NULL) { + const xmlChar *cur = ent->name; + + buffer[nbchars++] = '&'; + if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + while (*cur != 0) { /* non input consuming loop */ + buffer[nbchars++] = *cur++; + } + buffer[nbchars++] = ';'; + } + } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { + /* + * a PEReference induce to switch the entity flow, + * we break here to flush the current set of chars + * parsed if any. We will be called back later. + */ + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "decoding PE Reference\n"); + if (nbchars != 0) break; + + xmlParsePEReference(ctxt); + + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ + xmlPopInput(ctxt); + + break; + } else { + COPY_BUF(l,buffer,nbchars,c); + NEXTL(l); + if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + c = CUR_CHAR(l); + } + buffer[nbchars++] = 0; + return(buffer); +#endif + return(NULL); +} + +/** + * xmlNamespaceParseNCName: + * @ctxt: an XML parser context + * + * parse an XML namespace name. + * + * TODO: this seems not in use anymore, the namespace handling is done on + * top of the SAX interfaces, i.e. not on raw input. + * + * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* + * + * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * Returns the namespace name or NULL + */ + +xmlChar * +xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { +#if 0 + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0, l; + int cur = CUR_CHAR(l); +#endif + + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlNamespaceParseNCName() deprecated function reached\n"); + deprecated = 1; + } + +#if 0 + /* load first the value of the char !!! */ + GROW; + if (!IS_LETTER(cur) && (cur != '_')) return(NULL); + +xmlGenericError(xmlGenericErrorContext, + "xmlNamespaceParseNCName: reached loop 3\n"); + while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */ + (cur == '.') || (cur == '-') || + (cur == '_') || + (IS_COMBINING(cur)) || + (IS_EXTENDER(cur))) { + COPY_BUF(l,buf,len,cur); + NEXTL(l); + cur = CUR_CHAR(l); + if (len >= XML_MAX_NAMELEN) { + xmlGenericError(xmlGenericErrorContext, + "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n"); + while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */ + (cur == '.') || (cur == '-') || + (cur == '_') || + (IS_COMBINING(cur)) || + (IS_EXTENDER(cur))) { + NEXTL(l); + cur = CUR_CHAR(l); + } + break; + } + } + return(xmlStrndup(buf, len)); +#endif + return(NULL); +} + +/** + * xmlNamespaceParseQName: + * @ctxt: an XML parser context + * @prefix: a xmlChar ** + * + * TODO: this seems not in use anymore, the namespace handling is done on + * top of the SAX interfaces, i.e. not on raw input. + * + * parse an XML qualified name + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns the local part, and prefix is updated + * to get the Prefix if any. + */ + +xmlChar * +xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) { + + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlNamespaceParseQName() deprecated function reached\n"); + deprecated = 1; + } + +#if 0 + xmlChar *ret = NULL; + + *prefix = NULL; + ret = xmlNamespaceParseNCName(ctxt); + if (RAW == ':') { + *prefix = ret; + NEXT; + ret = xmlNamespaceParseNCName(ctxt); + } + + return(ret); +#endif + return(NULL); +} + +/** + * xmlNamespaceParseNSDef: + * @ctxt: an XML parser context + * + * parse a namespace prefix declaration + * + * TODO: this seems not in use anymore, the namespace handling is done on + * top of the SAX interfaces, i.e. not on raw input. + * + * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral + * + * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)? + * + * Returns the namespace name + */ + +xmlChar * +xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlNamespaceParseNSDef() deprecated function reached\n"); + deprecated = 1; + } + return(NULL); +#if 0 + xmlChar *name = NULL; + + if ((RAW == 'x') && (NXT(1) == 'm') && + (NXT(2) == 'l') && (NXT(3) == 'n') && + (NXT(4) == 's')) { + SKIP(5); + if (RAW == ':') { + NEXT; + name = xmlNamespaceParseNCName(ctxt); + } + } + return(name); +#endif +} + +/** + * xmlParseQuotedString: + * @ctxt: an XML parser context + * + * Parse and return a string between quotes or doublequotes + * + * TODO: Deprecated, to be removed at next drop of binary compatibility + * + * Returns the string parser or NULL. + */ +xmlChar * +xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlParseQuotedString() deprecated function reached\n"); + deprecated = 1; + } + return(NULL); + +#if 0 + xmlChar *buf = NULL; + int len = 0,l; + int size = XML_PARSER_BUFFER_SIZE; + int c; + + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } +xmlGenericError(xmlGenericErrorContext, + "xmlParseQuotedString: reached loop 4\n"); + if (RAW == '"') { + NEXT; + c = CUR_CHAR(l); + while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */ + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + c = CUR_CHAR(l); + } + if (c != '"') { + ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "String not closed \"%.50s\"\n", buf); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + NEXT; + } + } else if (RAW == '\''){ + NEXT; + c = CUR; + while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */ + if (len + 1 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + buf[len++] = c; + NEXT; + c = CUR; + } + if (RAW != '\'') { + ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "String not closed \"%.50s\"\n", buf); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + } else { + NEXT; + } + } + return(buf); +#endif +} + +/** + * xmlParseNamespace: + * @ctxt: an XML parser context + * + * xmlParseNamespace: parse specific PI '<?namespace ...' constructs. + * + * This is what the older xml-name Working Draft specified, a bunch of + * other stuff may still rely on it, so support is still here as + * if it was declared on the root of the Tree:-( + * + * TODO: remove from library + * + * To be removed at next drop of binary compatibility + */ + +void +xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlParseNamespace() deprecated function reached\n"); + deprecated = 1; + } + +#if 0 + xmlChar *href = NULL; + xmlChar *prefix = NULL; + int garbage = 0; + + /* + * We just skipped "namespace" or "xml:namespace" + */ + SKIP_BLANKS; + +xmlGenericError(xmlGenericErrorContext, + "xmlParseNamespace: reached loop 5\n"); + while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */ + /* + * We can have "ns" or "prefix" attributes + * Old encoding as 'href' or 'AS' attributes is still supported + */ + if ((RAW == 'n') && (NXT(1) == 's')) { + garbage = 0; + SKIP(2); + SKIP_BLANKS; + + if (RAW != '=') continue; + NEXT; + SKIP_BLANKS; + + href = xmlParseQuotedString(ctxt); + SKIP_BLANKS; + } else if ((RAW == 'h') && (NXT(1) == 'r') && + (NXT(2) == 'e') && (NXT(3) == 'f')) { + garbage = 0; + SKIP(4); + SKIP_BLANKS; + + if (RAW != '=') continue; + NEXT; + SKIP_BLANKS; + + href = xmlParseQuotedString(ctxt); + SKIP_BLANKS; + } else if ((RAW == 'p') && (NXT(1) == 'r') && + (NXT(2) == 'e') && (NXT(3) == 'f') && + (NXT(4) == 'i') && (NXT(5) == 'x')) { + garbage = 0; + SKIP(6); + SKIP_BLANKS; + + if (RAW != '=') continue; + NEXT; + SKIP_BLANKS; + + prefix = xmlParseQuotedString(ctxt); + SKIP_BLANKS; + } else if ((RAW == 'A') && (NXT(1) == 'S')) { + garbage = 0; + SKIP(2); + SKIP_BLANKS; + + if (RAW != '=') continue; + NEXT; + SKIP_BLANKS; + + prefix = xmlParseQuotedString(ctxt); + SKIP_BLANKS; + } else if ((RAW == '?') && (NXT(1) == '>')) { + garbage = 0; + NEXT; + } else { + /* + * Found garbage when parsing the namespace + */ + if (!garbage) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseNamespace found garbage\n"); + } + ctxt->errNo = XML_ERR_NS_DECL_ERROR; + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + NEXT; + } + } + + MOVETO_ENDTAG(CUR_PTR); + NEXT; + + /* + * Register the DTD. + if (href != NULL) + if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL)) + ctxt->sax->globalNamespace(ctxt->userData, href, prefix); + */ + + if (prefix != NULL) xmlFree(prefix); + if (href != NULL) xmlFree(href); +#endif +} + +/** + * xmlScanName: + * @ctxt: an XML parser context + * + * Trickery: parse an XML name but without consuming the input flow + * Needed for rollback cases. Used only when parsing entities references. + * + * TODO: seems deprecated now, only used in the default part of + * xmlParserHandleReference + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + * + * Returns the Name parsed or NULL + */ + +xmlChar * +xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlScanName() deprecated function reached\n"); + deprecated = 1; + } + return(NULL); + +#if 0 + xmlChar buf[XML_MAX_NAMELEN]; + int len = 0; + + GROW; + if (!IS_LETTER(RAW) && (RAW != '_') && + (RAW != ':')) { + return(NULL); + } + + + while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */ + (NXT(len) == '.') || (NXT(len) == '-') || + (NXT(len) == '_') || (NXT(len) == ':') || + (IS_COMBINING(NXT(len))) || + (IS_EXTENDER(NXT(len)))) { + GROW; + buf[len] = NXT(len); + len++; + if (len >= XML_MAX_NAMELEN) { + xmlGenericError(xmlGenericErrorContext, + "xmlScanName: reached XML_MAX_NAMELEN limit\n"); + while ((IS_LETTER(NXT(len))) || /* NOT REACHED */ + (IS_DIGIT(NXT(len))) || + (NXT(len) == '.') || (NXT(len) == '-') || + (NXT(len) == '_') || (NXT(len) == ':') || + (IS_COMBINING(NXT(len))) || + (IS_EXTENDER(NXT(len)))) + len++; + break; + } + } + return(xmlStrndup(buf, len)); +#endif +} + +/** + * xmlParserHandleReference: + * @ctxt: the parser context + * + * TODO: Remove, now deprecated ... the test is done directly in the + * content parsing + * routines. + * + * [67] Reference ::= EntityRef | CharRef + * + * [68] EntityRef ::= '&' Name ';' + * + * [ WFC: Entity Declared ] + * the Name given in the entity reference must match that in an entity + * declaration, except that well-formed documents need not declare any + * of the following entities: amp, lt, gt, apos, quot. + * + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an unparsed entity + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * A PEReference may have been detected in the current input stream + * the handling is done accordingly to + * http://www.w3.org/TR/REC-xml#entproc + */ +void +xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserHandleReference() deprecated function reached\n"); + deprecated = 1; + } + + return; +} + +/** + * xmlHandleEntity: + * @ctxt: an XML parser context + * @entity: an XML entity pointer. + * + * Default handling of defined entities, when should we define a new input + * stream ? When do we just handle that as a set of chars ? + * + * OBSOLETE: to be removed at some point. + */ + +void +xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlHandleEntity() deprecated function reached\n"); + deprecated = 1; + } + +#if 0 + int len; + xmlParserInputPtr input; + + if (entity->content == NULL) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n", + entity->name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + return; + } + len = xmlStrlen(entity->content); + if (len <= 2) goto handle_as_char; + + /* + * Redefine its content as an input stream. + */ + input = xmlNewEntityInputStream(ctxt, entity); + xmlPushInput(ctxt, input); + return; + +handle_as_char: + /* + * Just handle the content as a set of chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && + (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, entity->content, len); +#endif +} + +/** + * xmlNewGlobalNs: + * @doc: the document carrying the namespace + * @href: the URI associated + * @prefix: the prefix for the namespace + * + * Creation of a Namespace, the old way using PI and without scoping + * DEPRECATED !!! + * It now create a namespace on the root element of the document if found. + * Returns NULL this functionality had been removed + */ +xmlNsPtr +xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED, + const xmlChar *prefix ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewGlobalNs() deprecated function reached\n"); + deprecated = 1; + } + return(NULL); +#if 0 + xmlNodePtr root; + + xmlNsPtr cur; + + root = xmlDocGetRootElement(doc); + if (root != NULL) + return(xmlNewNs(root, href, prefix)); + + /* + * if there is no root element yet, create an old Namespace type + * and it will be moved to the root at save time. + */ + cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewGlobalNs : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNs)); + cur->type = XML_GLOBAL_NAMESPACE; + + if (href != NULL) + cur->href = xmlStrdup(href); + if (prefix != NULL) + cur->prefix = xmlStrdup(prefix); + + /* + * Add it at the end to preserve parsing order ... + */ + if (doc != NULL) { + if (doc->oldNs == NULL) { + doc->oldNs = cur; + } else { + xmlNsPtr prev = doc->oldNs; + + while (prev->next != NULL) prev = prev->next; + prev->next = cur; + } + } + + return(NULL); +#endif +} + +/** + * xmlUpgradeOldNs: + * @doc: a document pointer + * + * Upgrade old style Namespaces (PI) and move them to the root of the document. + * DEPRECATED + */ +void +xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) { + static int deprecated = 0; + if (!deprecated) { + xmlGenericError(xmlGenericErrorContext, + "xmlUpgradeOldNs() deprecated function reached\n"); + deprecated = 1; + } +#if 0 + xmlNsPtr cur; + + if ((doc == NULL) || (doc->oldNs == NULL)) return; + if (doc->children == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlUpgradeOldNs: failed no root !\n"); +#endif + return; + } + + cur = doc->oldNs; + while (cur->next != NULL) { + cur->type = XML_LOCAL_NAMESPACE; + cur = cur->next; + } + cur->type = XML_LOCAL_NAMESPACE; + cur->next = doc->children->nsDef; + doc->children->nsDef = doc->oldNs; + doc->oldNs = NULL; +#endif +} + diff --git a/bundle/libxml/stamp-h.in b/bundle/libxml/stamp-h.in new file mode 100644 index 0000000000..9788f70238 --- /dev/null +++ b/bundle/libxml/stamp-h.in @@ -0,0 +1 @@ +timestamp diff --git a/bundle/libxml/threads.c b/bundle/libxml/threads.c new file mode 100644 index 0000000000..6aa5ecc307 --- /dev/null +++ b/bundle/libxml/threads.c @@ -0,0 +1,537 @@ +/** + * threads.c: set of generic threading related routines + * + * See Copyright for the status of this software. + * + * Gary Pennington <Gary.Pennington@uk.sun.com> + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> + +#include <libxml/threads.h> +#include <libxml/globals.h> + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_PTHREAD_H +#include <pthread.h> +#endif + +#ifdef HAVE_WIN32_THREADS +#include <windows.h> +#ifndef HAVE_COMPILER_TLS +#include <process.h> +#endif +#endif + +#if defined(SOLARIS) +#include <note.h> +#endif + +/* #define DEBUG_THREADS */ + +/* + * TODO: this module still uses malloc/free and not xmlMalloc/xmlFree + * to avoid some crazyness since xmlMalloc/xmlFree may actually + * be hosted on allocated blocks needing them for the allocation ... + */ + +/* + * xmlMutex are a simple mutual exception locks + */ +struct _xmlMutex { +#ifdef HAVE_PTHREAD_H + pthread_mutex_t lock; +#elif defined HAVE_WIN32_THREADS + HANDLE mutex; +#else + int empty; +#endif +}; + +/* + * xmlRMutex are reentrant mutual exception locks + */ +struct _xmlRMutex { +#ifdef HAVE_PTHREAD_H + pthread_mutex_t lock; + unsigned int held; + unsigned int waiters; + pthread_t tid; + pthread_cond_t cv; +#elif defined HAVE_WIN32_THREADS + CRITICAL_SECTION cs; + unsigned int count; +#else + int empty; +#endif +}; +/* + * This module still has some internal static data. + * - xmlLibraryLock a global lock + * - globalkey used for per-thread data + */ + +#ifdef HAVE_PTHREAD_H +static pthread_key_t globalkey; +static pthread_t mainthread; +static pthread_once_t once_control = PTHREAD_ONCE_INIT; +#elif defined HAVE_WIN32_THREADS +#if defined(HAVE_COMPILER_TLS) +static __declspec(thread) xmlGlobalState tlstate; +static __declspec(thread) int tlstate_inited = 0; +#else /* HAVE_COMPILER_TLS */ +static DWORD globalkey; +#endif /* HAVE_COMPILER_TLS */ +static DWORD mainthread; +static int run_once_init = 1; +#endif /* HAVE_WIN32_THREADS */ + +static xmlRMutexPtr xmlLibraryLock = NULL; +static void xmlOnceInit(void); + +/** + * xmlNewMutex: + * + * xmlNewMutex() is used to allocate a libxml2 token struct for use in + * synchronizing access to data. + * + * Returns a new simple mutex pointer or NULL in case of error + */ +xmlMutexPtr +xmlNewMutex(void) +{ + xmlMutexPtr tok; + + if ((tok = malloc(sizeof(xmlMutex))) == NULL) + return (NULL); +#ifdef HAVE_PTHREAD_H + pthread_mutex_init(&tok->lock, NULL); +#elif defined HAVE_WIN32_THREADS + tok->mutex = CreateMutex(NULL, FALSE, NULL); +#endif + return (tok); +} + +/** + * xmlFreeMutex: + * @tok: the simple mutex + * + * xmlFreeMutex() is used to reclaim resources associated with a libxml2 token + * struct. + */ +void +xmlFreeMutex(xmlMutexPtr tok) +{ +#ifdef HAVE_PTHREAD_H + pthread_mutex_destroy(&tok->lock); +#elif defined HAVE_WIN32_THREADS + CloseHandle(tok->mutex); +#endif + free(tok); +} + +/** + * xmlMutexLock: + * @tok: the simple mutex + * + * xmlMutexLock() is used to lock a libxml2 token. + */ +void +xmlMutexLock(xmlMutexPtr tok ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_PTHREAD_H + pthread_mutex_lock(&tok->lock); +#elif defined HAVE_WIN32_THREADS + WaitForSingleObject(tok->mutex, INFINITE); +#endif + +} + +/** + * xmlMutexUnlock: + * @tok: the simple mutex + * + * xmlMutexUnlock() is used to unlock a libxml2 token. + */ +void +xmlMutexUnlock(xmlMutexPtr tok ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_PTHREAD_H + pthread_mutex_unlock(&tok->lock); +#elif defined HAVE_WIN32_THREADS + ReleaseMutex(tok->mutex); +#endif +} + +/** + * xmlNewRMutex: + * + * xmlRNewMutex() is used to allocate a reentrant mutex for use in + * synchronizing access to data. token_r is a re-entrant lock and thus useful + * for synchronizing access to data structures that may be manipulated in a + * recursive fashion. + * + * Returns the new reentrant mutex pointer or NULL in case of error + */ +xmlRMutexPtr +xmlNewRMutex(void) +{ + xmlRMutexPtr tok; + + if ((tok = malloc(sizeof(xmlRMutex))) == NULL) + return (NULL); +#ifdef HAVE_PTHREAD_H + pthread_mutex_init(&tok->lock, NULL); + tok->held = 0; + tok->waiters = 0; +#elif defined HAVE_WIN32_THREADS + InitializeCriticalSection(&tok->cs); + tok->count = 0; +#endif + return (tok); +} + +/** + * xmlFreeRMutex: + * @tok: the reentrant mutex + * + * xmlRFreeMutex() is used to reclaim resources associated with a + * reentrant mutex. + */ +void +xmlFreeRMutex(xmlRMutexPtr tok ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_PTHREAD_H + pthread_mutex_destroy(&tok->lock); +#elif defined HAVE_WIN32_THREADS + DeleteCriticalSection(&tok->cs); +#endif + free(tok); +} + +/** + * xmlRMutexLock: + * @tok: the reentrant mutex + * + * xmlRMutexLock() is used to lock a libxml2 token_r. + */ +void +xmlRMutexLock(xmlRMutexPtr tok ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_PTHREAD_H + pthread_mutex_lock(&tok->lock); + if (tok->held) { + if (pthread_equal(tok->tid, pthread_self())) { + tok->held++; + pthread_mutex_unlock(&tok->lock); + return; + } else { + tok->waiters++; + while (tok->held) + pthread_cond_wait(&tok->cv, &tok->lock); + tok->waiters--; + } + } + tok->tid = pthread_self(); + tok->held = 1; + pthread_mutex_unlock(&tok->lock); +#elif defined HAVE_WIN32_THREADS + EnterCriticalSection(&tok->cs); + ++tok->count; +#endif +} + +/** + * xmlRMutexUnlock: + * @tok: the reentrant mutex + * + * xmlRMutexUnlock() is used to unlock a libxml2 token_r. + */ +void +xmlRMutexUnlock(xmlRMutexPtr tok ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_PTHREAD_H + pthread_mutex_lock(&tok->lock); + tok->held--; + if (tok->held == 0) { + if (tok->waiters) + pthread_cond_signal(&tok->cv); + tok->tid = 0; + } + pthread_mutex_unlock(&tok->lock); +#elif defined HAVE_WIN32_THREADS + if (!--tok->count) + LeaveCriticalSection(&tok->cs); +#endif +} + +/************************************************************************ + * * + * Per thread global state handling * + * * + ************************************************************************/ + +#ifdef LIBXML_THREAD_ENABLED +/** + * xmlFreeGlobalState: + * @state: a thread global state + * + * xmlFreeGlobalState() is called when a thread terminates with a non-NULL + * global state. It is is used here to reclaim memory resources. + */ +static void +xmlFreeGlobalState(void *state) +{ + free(state); +} + +/** + * xmlNewGlobalState: + * + * xmlNewGlobalState() allocates a global state. This structure is used to + * hold all data for use by a thread when supporting backwards compatibility + * of libxml2 to pre-thread-safe behaviour. + * + * Returns the newly allocated xmlGlobalStatePtr or NULL in case of error + */ +static xmlGlobalStatePtr +xmlNewGlobalState(void) +{ + xmlGlobalState *gs; + + gs = malloc(sizeof(xmlGlobalState)); + if (gs == NULL) + return(NULL); + + memset(gs, 0, sizeof(xmlGlobalState)); + xmlInitializeGlobalState(gs); + return (gs); +} +#endif /* LIBXML_THREAD_ENABLED */ + + +#ifdef HAVE_WIN32_THREADS +#if !defined(HAVE_COMPILER_TLS) +typedef struct _xmlGlobalStateCleanupHelperParams +{ + HANDLE thread; + void *memory; +} xmlGlobalStateCleanupHelperParams; + +static void xmlGlobalStateCleanupHelper (void *p) +{ + xmlGlobalStateCleanupHelperParams *params = (xmlGlobalStateCleanupHelperParams *) p; + WaitForSingleObject(params->thread, INFINITE); + CloseHandle(params->thread); + xmlFreeGlobalState(params->memory); + free(params); + _endthread(); +} +#endif /* HAVE_COMPILER_TLS */ +#endif /* HAVE_WIN32_THREADS */ + +/** + * xmlGetGlobalState: + * + * xmlGetGlobalState() is called to retrieve the global state for a thread. + * + * Returns the thread global state or NULL in case of error + */ +xmlGlobalStatePtr +xmlGetGlobalState(void) +{ +#ifdef HAVE_PTHREAD_H + xmlGlobalState *globalval; + + pthread_once(&once_control, xmlOnceInit); + + if ((globalval = (xmlGlobalState *) + pthread_getspecific(globalkey)) == NULL) { + xmlGlobalState *tsd = xmlNewGlobalState(); + + pthread_setspecific(globalkey, tsd); + return (tsd); + } + return (globalval); +#elif defined HAVE_WIN32_THREADS +#if defined(HAVE_COMPILER_TLS) + if (!tlstate_inited) { + tlstate_inited = 1; + xmlInitializeGlobalState(&tlstate); + } + return &tlstate; +#else /* HAVE_COMPILER_TLS */ + xmlGlobalState *globalval; + + if (run_once_init) { + run_once_init = 0; + xmlOnceInit(); + } + if ((globalval = (xmlGlobalState *) TlsGetValue(globalkey)) == NULL) { + xmlGlobalState *tsd = xmlNewGlobalState(); + xmlGlobalStateCleanupHelperParams *p = + (xmlGlobalStateCleanupHelperParams *) malloc(sizeof(xmlGlobalStateCleanupHelperParams)); + p->memory = tsd; + DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), + GetCurrentProcess(), &p->thread, 0, TRUE, DUPLICATE_SAME_ACCESS); + TlsSetValue(globalkey, tsd); + _beginthread(xmlGlobalStateCleanupHelper, 0, p); + + return (tsd); + } + return (globalval); +#endif /* HAVE_COMPILER_TLS */ +#else + return(NULL); +#endif +} + +/************************************************************************ + * * + * Library wide thread interfaces * + * * + ************************************************************************/ + +/** + * xmlGetThreadId: + * + * xmlGetThreadId() find the current thread ID number + * + * Returns the current thread ID number + */ +int +xmlGetThreadId(void) +{ +#ifdef HAVE_PTHREAD_H + return((int) pthread_self()); +#elif defined HAVE_WIN32_THREADS + return GetCurrentThreadId(); +#else + return((int) 0); +#endif +} + +/** + * xmlIsMainThread: + * + * xmlIsMainThread() check whether the current thread is the main thread. + * + * Returns 1 if the current thread is the main thread, 0 otherwise + */ +int +xmlIsMainThread(void) +{ +#ifdef HAVE_PTHREAD_H + pthread_once(&once_control, xmlOnceInit); +#elif defined HAVE_WIN32_THREADS + if (run_once_init) { + run_once_init = 0; + xmlOnceInit (); + } +#endif + +#ifdef DEBUG_THREADS + xmlGenericError(xmlGenericErrorContext, "xmlIsMainThread()\n"); +#endif +#ifdef HAVE_PTHREAD_H + return(mainthread == pthread_self()); +#elif defined HAVE_WIN32_THREADS + return(mainthread == GetCurrentThreadId ()); +#else + return(1); +#endif +} + +/** + * xmlLockLibrary: + * + * xmlLockLibrary() is used to take out a re-entrant lock on the libxml2 + * library. + */ +void +xmlLockLibrary(void) +{ +#ifdef DEBUG_THREADS + xmlGenericError(xmlGenericErrorContext, "xmlLockLibrary()\n"); +#endif + xmlRMutexLock(xmlLibraryLock); +} + +/** + * xmlUnlockLibrary: + * + * xmlUnlockLibrary() is used to release a re-entrant lock on the libxml2 + * library. + */ +void +xmlUnlockLibrary(void) +{ +#ifdef DEBUG_THREADS + xmlGenericError(xmlGenericErrorContext, "xmlUnlockLibrary()\n"); +#endif + xmlRMutexUnlock(xmlLibraryLock); +} + +/** + * xmlInitThreads: + * + * xmlInitThreads() is used to to initialize all the thread related + * data of the libxml2 library. + */ +void +xmlInitThreads(void) +{ +#ifdef DEBUG_THREADS + xmlGenericError(xmlGenericErrorContext, "xmlInitThreads()\n"); +#endif +} + +/** + * xmlCleanupThreads: + * + * xmlCleanupThreads() is used to to cleanup all the thread related + * data of the libxml2 library once processing has ended. + */ +void +xmlCleanupThreads(void) +{ +#ifdef DEBUG_THREADS + xmlGenericError(xmlGenericErrorContext, "xmlCleanupThreads()\n"); +#endif +} + +/** + * xmlOnceInit + * + * xmlOnceInit() is used to initialize the value of mainthread for use + * in other routines. This function should only be called using + * pthread_once() in association with the once_control variable to ensure + * that the function is only called once. See man pthread_once for more + * details. + */ +static void +xmlOnceInit(void) { +#ifdef HAVE_PTHREAD_H + (void) pthread_key_create(&globalkey, xmlFreeGlobalState); + mainthread = pthread_self(); +#endif + +#if defined(HAVE_WIN32_THREADS) +#if !defined(HAVE_COMPILER_TLS) + globalkey = TlsAlloc(); +#endif + mainthread = GetCurrentThreadId(); +#endif +} diff --git a/bundle/libxml/tree.c b/bundle/libxml/tree.c new file mode 100644 index 0000000000..c3d304501f --- /dev/null +++ b/bundle/libxml/tree.c @@ -0,0 +1,7566 @@ +/* + * tree.c : implementation of access function for an XML tree. + * + * References: + * XHTML 1.0 W3C REC: http://www.w3.org/TR/2002/REC-xhtml1-20020801/ + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + * + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> /* for memset() only ! */ + +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/uri.h> +#include <libxml/entities.h> +#include <libxml/valid.h> +#include <libxml/xmlerror.h> +#include <libxml/parserInternals.h> +#include <libxml/globals.h> +#ifdef LIBXML_HTML_ENABLED +#include <libxml/HTMLtree.h> +#endif + +xmlNsPtr xmlNewReconciliedNs(xmlDocPtr doc, xmlNodePtr tree, xmlNsPtr ns); + +/************************************************************************ + * * + * A few static variables and macros * + * * + ************************************************************************/ +/* #undef xmlStringText */ +const xmlChar xmlStringText[] = { 't', 'e', 'x', 't', 0 }; +/* #undef xmlStringTextNoenc */ +const xmlChar xmlStringTextNoenc[] = + { 't', 'e', 'x', 't', 'n', 'o', 'e', 'n', 'c', 0 }; +/* #undef xmlStringComment */ +const xmlChar xmlStringComment[] = { 'c', 'o', 'm', 'm', 'e', 'n', 't', 0 }; + +static int xmlCompressMode = 0; +static int xmlCheckDTD = 1; + +#define UPDATE_LAST_CHILD_AND_PARENT(n) if ((n) != NULL) { \ + xmlNodePtr ulccur = (n)->children; \ + if (ulccur == NULL) { \ + (n)->last = NULL; \ + } else { \ + while (ulccur->next != NULL) { \ + ulccur->parent = (n); \ + ulccur = ulccur->next; \ + } \ + ulccur->parent = (n); \ + (n)->last = ulccur; \ +}} + +/* #define DEBUG_BUFFER */ +/* #define DEBUG_TREE */ + +/************************************************************************ + * * + * Functions to move to entities.c once the * + * API freeze is smoothen and they can be made public. * + * * + ************************************************************************/ +#include <libxml/hash.h> + +/** + * xmlGetEntityFromDtd: + * @dtd: A pointer to the DTD to search + * @name: The entity name + * + * Do an entity lookup in the DTD entity hash table and + * return the corresponding entity, if found. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +static xmlEntityPtr +xmlGetEntityFromDtd(xmlDtdPtr dtd, const xmlChar *name) { + xmlEntitiesTablePtr table; + + if((dtd != NULL) && (dtd->entities != NULL)) { + table = (xmlEntitiesTablePtr) dtd->entities; + return((xmlEntityPtr) xmlHashLookup(table, name)); + /* return(xmlGetEntityFromTable(table, name)); */ + } + return(NULL); +} +/** + * xmlGetParameterEntityFromDtd: + * @dtd: A pointer to the DTD to search + * @name: The entity name + * + * Do an entity lookup in the DTD pararmeter entity hash table and + * return the corresponding entity, if found. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +static xmlEntityPtr +xmlGetParameterEntityFromDtd(xmlDtdPtr dtd, const xmlChar *name) { + xmlEntitiesTablePtr table; + + if ((dtd != NULL) && (dtd->pentities != NULL)) { + table = (xmlEntitiesTablePtr) dtd->pentities; + return((xmlEntityPtr) xmlHashLookup(table, name)); + /* return(xmlGetEntityFromTable(table, name)); */ + } + return(NULL); +} + +/************************************************************************ + * * + * Allocation and deallocation of basic structures * + * * + ************************************************************************/ + +/** + * xmlSetBufferAllocationScheme: + * @scheme: allocation method to use + * + * Set the buffer allocation method. Types are + * XML_BUFFER_ALLOC_EXACT - use exact sizes, keeps memory usage down + * XML_BUFFER_ALLOC_DOUBLEIT - double buffer when extra needed, + * improves performance + */ +void +xmlSetBufferAllocationScheme(xmlBufferAllocationScheme scheme) { + xmlBufferAllocScheme = scheme; +} + +/** + * xmlGetBufferAllocationScheme: + * + * Types are + * XML_BUFFER_ALLOC_EXACT - use exact sizes, keeps memory usage down + * XML_BUFFER_ALLOC_DOUBLEIT - double buffer when extra needed, + * improves performance + * + * Returns the current allocation scheme + */ +xmlBufferAllocationScheme +xmlGetBufferAllocationScheme(void) { + return(xmlBufferAllocScheme); +} + +/** + * xmlNewNs: + * @node: the element carrying the namespace + * @href: the URI associated + * @prefix: the prefix for the namespace + * + * Creation of a new Namespace. This function will refuse to create + * a namespace with a similar prefix than an existing one present on this + * node. + * We use href==NULL in the case of an element creation where the namespace + * was not defined. + * Returns a new namespace pointer or NULL + */ +xmlNsPtr +xmlNewNs(xmlNodePtr node, const xmlChar *href, const xmlChar *prefix) { + xmlNsPtr cur; + + if ((node != NULL) && (node->type != XML_ELEMENT_NODE)) + return(NULL); + + if ((prefix != NULL) && (xmlStrEqual(prefix, BAD_CAST "xml"))) + return(NULL); + + /* + * Allocate a new Namespace and fill the fields. + */ + cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewNs : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNs)); + cur->type = XML_LOCAL_NAMESPACE; + + if (href != NULL) + cur->href = xmlStrdup(href); + if (prefix != NULL) + cur->prefix = xmlStrdup(prefix); + + /* + * Add it at the end to preserve parsing order ... + * and checks for existing use of the prefix + */ + if (node != NULL) { + if (node->nsDef == NULL) { + node->nsDef = cur; + } else { + xmlNsPtr prev = node->nsDef; + + if (((prev->prefix == NULL) && (cur->prefix == NULL)) || + (xmlStrEqual(prev->prefix, cur->prefix))) { + xmlFreeNs(cur); + return(NULL); + } + while (prev->next != NULL) { + prev = prev->next; + if (((prev->prefix == NULL) && (cur->prefix == NULL)) || + (xmlStrEqual(prev->prefix, cur->prefix))) { + xmlFreeNs(cur); + return(NULL); + } + } + prev->next = cur; + } + } + return(cur); +} + +/** + * xmlSetNs: + * @node: a node in the document + * @ns: a namespace pointer + * + * Associate a namespace to a node, a posteriori. + */ +void +xmlSetNs(xmlNodePtr node, xmlNsPtr ns) { + if (node == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlSetNs: node == NULL\n"); +#endif + return; + } + node->ns = ns; +} + +/** + * xmlFreeNs: + * @cur: the namespace pointer + * + * Free up the structures associated to a namespace + */ +void +xmlFreeNs(xmlNsPtr cur) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlFreeNs : ns == NULL\n"); +#endif + return; + } + if (cur->href != NULL) xmlFree((char *) cur->href); + if (cur->prefix != NULL) xmlFree((char *) cur->prefix); + xmlFree(cur); +} + +/** + * xmlFreeNsList: + * @cur: the first namespace pointer + * + * Free up all the structures associated to the chained namespaces. + */ +void +xmlFreeNsList(xmlNsPtr cur) { + xmlNsPtr next; + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlFreeNsList : ns == NULL\n"); +#endif + return; + } + while (cur != NULL) { + next = cur->next; + xmlFreeNs(cur); + cur = next; + } +} + +/** + * xmlNewDtd: + * @doc: the document pointer + * @name: the DTD name + * @ExternalID: the external ID + * @SystemID: the system ID + * + * Creation of a new DTD for the external subset. To create an + * internal subset, use xmlCreateIntSubset(). + * + * Returns a pointer to the new DTD structure + */ +xmlDtdPtr +xmlNewDtd(xmlDocPtr doc, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) { + xmlDtdPtr cur; + + if ((doc != NULL) && (doc->extSubset != NULL)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewDtd(%s): document %s already have a DTD %s\n", + /* !!! */ (char *) name, doc->name, + /* !!! */ (char *)doc->extSubset->name); +#endif + return(NULL); + } + + /* + * Allocate a new DTD and fill the fields. + */ + cur = (xmlDtdPtr) xmlMalloc(sizeof(xmlDtd)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewDtd : malloc failed\n"); + return(NULL); + } + memset(cur, 0 , sizeof(xmlDtd)); + cur->type = XML_DTD_NODE; + + if (name != NULL) + cur->name = xmlStrdup(name); + if (ExternalID != NULL) + cur->ExternalID = xmlStrdup(ExternalID); + if (SystemID != NULL) + cur->SystemID = xmlStrdup(SystemID); + if (doc != NULL) + doc->extSubset = cur; + cur->doc = doc; + + return(cur); +} + +/** + * xmlGetIntSubset: + * @doc: the document pointer + * + * Get the internal subset of a document + * Returns a pointer to the DTD structure or NULL if not found + */ + +xmlDtdPtr +xmlGetIntSubset(xmlDocPtr doc) { + xmlNodePtr cur; + + if (doc == NULL) + return(NULL); + cur = doc->children; + while (cur != NULL) { + if (cur->type == XML_DTD_NODE) + return((xmlDtdPtr) cur); + cur = cur->next; + } + return((xmlDtdPtr) doc->intSubset); +} + +/** + * xmlCreateIntSubset: + * @doc: the document pointer + * @name: the DTD name + * @ExternalID: the external (PUBLIC) ID + * @SystemID: the system ID + * + * Create the internal subset of a document + * Returns a pointer to the new DTD structure + */ +xmlDtdPtr +xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) { + xmlDtdPtr cur; + + if ((doc != NULL) && (xmlGetIntSubset(doc) != NULL)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + + "xmlCreateIntSubset(): document %s already have an internal subset\n", + doc->name); +#endif + return(NULL); + } + + /* + * Allocate a new DTD and fill the fields. + */ + cur = (xmlDtdPtr) xmlMalloc(sizeof(xmlDtd)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCreateIntSubset : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlDtd)); + cur->type = XML_DTD_NODE; + + if (name != NULL) + cur->name = xmlStrdup(name); + if (ExternalID != NULL) + cur->ExternalID = xmlStrdup(ExternalID); + if (SystemID != NULL) + cur->SystemID = xmlStrdup(SystemID); + if (doc != NULL) { + doc->intSubset = cur; + cur->parent = doc; + cur->doc = doc; + if (doc->children == NULL) { + doc->children = (xmlNodePtr) cur; + doc->last = (xmlNodePtr) cur; + } else { + if (doc->type == XML_HTML_DOCUMENT_NODE) { + xmlNodePtr prev; + + prev = doc->children; + prev->prev = (xmlNodePtr) cur; + cur->next = prev; + doc->children = (xmlNodePtr) cur; + } else { + xmlNodePtr next; + + next = doc->children; + while ((next != NULL) && (next->type != XML_ELEMENT_NODE)) + next = next->next; + if (next == NULL) { + cur->prev = doc->last; + cur->prev->next = (xmlNodePtr) cur; + cur->next = NULL; + doc->last = (xmlNodePtr) cur; + } else { + cur->next = next; + cur->prev = next->prev; + if (cur->prev == NULL) + doc->children = (xmlNodePtr) cur; + else + cur->prev->next = (xmlNodePtr) cur; + next->prev = (xmlNodePtr) cur; + } + } + } + } + return(cur); +} + +/** + * xmlFreeDtd: + * @cur: the DTD structure to free up + * + * Free a DTD structure. + */ +void +xmlFreeDtd(xmlDtdPtr cur) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlFreeDtd : DTD == NULL\n"); +#endif + return; + } + if (cur->children != NULL) { + xmlNodePtr next, c = cur->children; + + /* + * Cleanup all the DTD comments they are not in the DTD + * indexes. + */ + while (c != NULL) { + next = c->next; + if (c->type == XML_COMMENT_NODE) { + xmlUnlinkNode(c); + xmlFreeNode(c); + } + c = next; + } + } + if (cur->name != NULL) xmlFree((char *) cur->name); + if (cur->SystemID != NULL) xmlFree((char *) cur->SystemID); + if (cur->ExternalID != NULL) xmlFree((char *) cur->ExternalID); + /* TODO !!! */ + if (cur->notations != NULL) + xmlFreeNotationTable((xmlNotationTablePtr) cur->notations); + + if (cur->elements != NULL) + xmlFreeElementTable((xmlElementTablePtr) cur->elements); + if (cur->attributes != NULL) + xmlFreeAttributeTable((xmlAttributeTablePtr) cur->attributes); + if (cur->entities != NULL) + xmlFreeEntitiesTable((xmlEntitiesTablePtr) cur->entities); + if (cur->pentities != NULL) + xmlFreeEntitiesTable((xmlEntitiesTablePtr) cur->pentities); + + xmlFree(cur); +} + +/** + * xmlNewDoc: + * @version: xmlChar string giving the version of XML "1.0" + * + * Creates a new XML document + * + * Returns a new document + */ +xmlDocPtr +xmlNewDoc(const xmlChar *version) { + xmlDocPtr cur; + + if (version == NULL) + version = (const xmlChar *) "1.0"; + + /* + * Allocate a new document and fill the fields. + */ + cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewDoc : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlDoc)); + cur->type = XML_DOCUMENT_NODE; + + cur->version = xmlStrdup(version); + cur->standalone = -1; + cur->compression = -1; /* not initialized */ + cur->doc = cur; + cur->charset = XML_CHAR_ENCODING_UTF8; + return(cur); +} + +/** + * xmlFreeDoc: + * @cur: pointer to the document + * + * Free up all the structures used by a document, tree included. + */ +void +xmlFreeDoc(xmlDocPtr cur) { + xmlDtdPtr extSubset, intSubset; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlFreeDoc : document == NULL\n"); +#endif + return; + } + /* + * Do this before freeing the children list to avoid ID lookups + */ + if (cur->ids != NULL) xmlFreeIDTable((xmlIDTablePtr) cur->ids); + cur->ids = NULL; + if (cur->refs != NULL) xmlFreeRefTable((xmlRefTablePtr) cur->refs); + cur->refs = NULL; + extSubset = cur->extSubset; + intSubset = cur->intSubset; + if (intSubset == extSubset) + extSubset = NULL; + if (extSubset != NULL) { + xmlUnlinkNode((xmlNodePtr) cur->extSubset); + cur->extSubset = NULL; + xmlFreeDtd(extSubset); + } + if (intSubset != NULL) { + xmlUnlinkNode((xmlNodePtr) cur->intSubset); + cur->intSubset = NULL; + xmlFreeDtd(intSubset); + } + + if (cur->children != NULL) xmlFreeNodeList(cur->children); + + if (cur->version != NULL) xmlFree((char *) cur->version); + if (cur->name != NULL) xmlFree((char *) cur->name); + if (cur->encoding != NULL) xmlFree((char *) cur->encoding); + if (cur->oldNs != NULL) xmlFreeNsList(cur->oldNs); + if (cur->URL != NULL) xmlFree((char *) cur->URL); + xmlFree(cur); +} + +/** + * xmlStringLenGetNodeList: + * @doc: the document + * @value: the value of the text + * @len: the length of the string value + * + * Parse the value string and build the node list associated. Should + * produce a flat tree with only TEXTs and ENTITY_REFs. + * Returns a pointer to the first child + */ +xmlNodePtr +xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) { + xmlNodePtr ret = NULL, last = NULL; + xmlNodePtr node; + xmlChar *val; + const xmlChar *cur = value; + const xmlChar *q; + xmlEntityPtr ent; + + if (value == NULL) return(NULL); + + q = cur; + while ((*cur != 0) && (cur - value < len)) { + if (*cur == '&') { + /* + * Save the current text. + */ + if (cur != q) { + if ((last != NULL) && (last->type == XML_TEXT_NODE)) { + xmlNodeAddContentLen(last, q, cur - q); + } else { + node = xmlNewDocTextLen(doc, q, cur - q); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } + } + } + /* + * Read the entity string + */ + cur++; + q = cur; + while ((*cur != 0) && (cur - value < len) && (*cur != ';')) cur++; + if ((*cur == 0) || (cur - value >= len)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlStringLenGetNodeList: unterminated entity %30s\n", q); +#endif + return(ret); + } + if (cur != q) { + /* + * Predefined entities don't generate nodes + */ + val = xmlStrndup(q, cur - q); + ent = xmlGetDocEntity(doc, val); + if ((ent != NULL) && + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + if (last == NULL) { + node = xmlNewDocText(doc, ent->content); + last = ret = node; + } else + xmlNodeAddContent(last, ent->content); + + } else { + /* + * Create a new REFERENCE_REF node + */ + node = xmlNewReference(doc, val); + if (node == NULL) { + if (val != NULL) xmlFree(val); + return(ret); + } + else if ((ent != NULL) && (ent->children == NULL)) { + xmlNodePtr tmp; + + ent->children = + xmlStringGetNodeList(doc, (const xmlChar*)node->content); + tmp = ent->children; + while (tmp) { + tmp->parent = (xmlNodePtr)ent; + tmp = tmp->next; + } + } + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } + } + xmlFree(val); + } + cur++; + q = cur; + } else + cur++; + } + if (cur != q) { + /* + * Handle the last piece of text. + */ + if ((last != NULL) && (last->type == XML_TEXT_NODE)) { + xmlNodeAddContentLen(last, q, cur - q); + } else { + node = xmlNewDocTextLen(doc, q, cur - q); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } + } + } + return(ret); +} + +/** + * xmlStringGetNodeList: + * @doc: the document + * @value: the value of the attribute + * + * Parse the value string and build the node list associated. Should + * produce a flat tree with only TEXTs and ENTITY_REFs. + * Returns a pointer to the first child + */ +xmlNodePtr +xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) { + xmlNodePtr ret = NULL, last = NULL; + xmlNodePtr node; + xmlChar *val; + const xmlChar *cur = value; + const xmlChar *q; + xmlEntityPtr ent; + + if (value == NULL) return(NULL); + + q = cur; + while (*cur != 0) { + if (cur[0] == '&') { + int charval = 0; + xmlChar tmp; + + /* + * Save the current text. + */ + if (cur != q) { + if ((last != NULL) && (last->type == XML_TEXT_NODE)) { + xmlNodeAddContentLen(last, q, cur - q); + } else { + node = xmlNewDocTextLen(doc, q, cur - q); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } + } + } + q = cur; + if ((cur[1] == '#') && (cur[2] == 'x')) { + cur += 3; + tmp = *cur; + while (tmp != ';') { /* Non input consuming loop */ + if ((tmp >= '0') && (tmp <= '9')) + charval = charval * 16 + (tmp - '0'); + else if ((tmp >= 'a') && (tmp <= 'f')) + charval = charval * 16 + (tmp - 'a') + 10; + else if ((tmp >= 'A') && (tmp <= 'F')) + charval = charval * 16 + (tmp - 'A') + 10; + else { + xmlGenericError(xmlGenericErrorContext, + "xmlStringGetNodeList: invalid hexadecimal charvalue\n"); + charval = 0; + break; + } + cur++; + tmp = *cur; + } + if (tmp == ';') + cur++; + q = cur; + } else if (cur[1] == '#') { + cur += 2; + tmp = *cur; + while (tmp != ';') { /* Non input consuming loops */ + if ((tmp >= '0') && (tmp <= '9')) + charval = charval * 10 + (tmp - '0'); + else { + xmlGenericError(xmlGenericErrorContext, + "xmlStringGetNodeList: invalid decimal charvalue\n"); + charval = 0; + break; + } + cur++; + tmp = *cur; + } + if (tmp == ';') + cur++; + q = cur; + } else { + /* + * Read the entity string + */ + cur++; + q = cur; + while ((*cur != 0) && (*cur != ';')) cur++; + if (*cur == 0) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlStringGetNodeList: unterminated entity %30s\n", q); +#endif + return(ret); + } + if (cur != q) { + /* + * Predefined entities don't generate nodes + */ + val = xmlStrndup(q, cur - q); + ent = xmlGetDocEntity(doc, val); + if ((ent != NULL) && + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + if (last == NULL) { + node = xmlNewDocText(doc, ent->content); + last = ret = node; + } else if (last->type != XML_TEXT_NODE) { + node = xmlNewDocText(doc, ent->content); + last = xmlAddNextSibling(last, node); + } else + xmlNodeAddContent(last, ent->content); + + } else { + /* + * Create a new REFERENCE_REF node + */ + node = xmlNewReference(doc, val); + if (node == NULL) { + if (val != NULL) xmlFree(val); + return(ret); + } + else if ((ent != NULL) && (ent->children == NULL)) { + xmlNodePtr temp; + + ent->children = xmlStringGetNodeList(doc, + (const xmlChar*)node->content); + temp = ent->children; + while (temp) { + temp->parent = (xmlNodePtr)ent; + temp = temp->next; + } + } + if (last == NULL) { + last = ret = node; + } else { + last = xmlAddNextSibling(last, node); + } + } + xmlFree(val); + } + cur++; + q = cur; + } + if (charval != 0) { + xmlChar buf[10]; + int len; + + len = xmlCopyCharMultiByte(buf, charval); + buf[len] = 0; + node = xmlNewDocText(doc, buf); + if (node != NULL) { + if (last == NULL) { + last = ret = node; + } else { + last = xmlAddNextSibling(last, node); + } + } + + charval = 0; + } + } else + cur++; + } + if ((cur != q) || (ret == NULL)) { + /* + * Handle the last piece of text. + */ + if ((last != NULL) && (last->type == XML_TEXT_NODE)) { + xmlNodeAddContentLen(last, q, cur - q); + } else { + node = xmlNewDocTextLen(doc, q, cur - q); + if (node == NULL) return(ret); + if (last == NULL) { + last = ret = node; + } else { + last = xmlAddNextSibling(last, node); + } + } + } + return(ret); +} + +/** + * xmlNodeListGetString: + * @doc: the document + * @list: a Node list + * @inLine: should we replace entity contents or show their external form + * + * Build the string equivalent to the text contained in the Node list + * made of TEXTs and ENTITY_REFs + * + * Returns a pointer to the string copy, the caller must free it with xmlFree(). + */ +xmlChar * +xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) +{ + xmlNodePtr node = list; + xmlChar *ret = NULL; + xmlEntityPtr ent; + + if (list == NULL) + return (NULL); + + while (node != NULL) { + if ((node->type == XML_TEXT_NODE) || + (node->type == XML_CDATA_SECTION_NODE)) { + if (inLine) { + ret = xmlStrcat(ret, node->content); + } else { + xmlChar *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, node->content); + if (buffer != NULL) { + ret = xmlStrcat(ret, buffer); + xmlFree(buffer); + } + } + } else if (node->type == XML_ENTITY_REF_NODE) { + if (inLine) { + ent = xmlGetDocEntity(doc, node->name); + if (ent != NULL) { + xmlChar *buffer; + + /* an entity content can be any "well balanced chunk", + * i.e. the result of the content [43] production: + * http://www.w3.org/TR/REC-xml#NT-content. + * So it can contain text, CDATA section or nested + * entity reference nodes (among others). + * -> we recursive call xmlNodeListGetString() + * which handles these types */ + buffer = xmlNodeListGetString(doc, ent->children, 1); + if (buffer != NULL) { + ret = xmlStrcat(ret, buffer); + xmlFree(buffer); + } + } else { + ret = xmlStrcat(ret, node->content); + } + } else { + xmlChar buf[2]; + + buf[0] = '&'; + buf[1] = 0; + ret = xmlStrncat(ret, buf, 1); + ret = xmlStrcat(ret, node->name); + buf[0] = ';'; + buf[1] = 0; + ret = xmlStrncat(ret, buf, 1); + } + } +#if 0 + else { + xmlGenericError(xmlGenericErrorContext, + "xmlGetNodeListString : invalid node type %d\n", + node->type); + } +#endif + node = node->next; + } + return (ret); +} +/** + * xmlNodeListGetRawString: + * @doc: the document + * @list: a Node list + * @inLine: should we replace entity contents or show their external form + * + * Builds the string equivalent to the text contained in the Node list + * made of TEXTs and ENTITY_REFs, contrary to xmlNodeListGetString() + * this function doesn't do any character encoding handling. + * + * Returns a pointer to the string copy, the caller must free it with xmlFree(). + */ +xmlChar * +xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine) +{ + xmlNodePtr node = list; + xmlChar *ret = NULL; + xmlEntityPtr ent; + + if (list == NULL) + return (NULL); + + while (node != NULL) { + if ((node->type == XML_TEXT_NODE) || + (node->type == XML_CDATA_SECTION_NODE)) { + if (inLine) { + ret = xmlStrcat(ret, node->content); + } else { + xmlChar *buffer; + + buffer = xmlEncodeSpecialChars(doc, node->content); + if (buffer != NULL) { + ret = xmlStrcat(ret, buffer); + xmlFree(buffer); + } + } + } else if (node->type == XML_ENTITY_REF_NODE) { + if (inLine) { + ent = xmlGetDocEntity(doc, node->name); + if (ent != NULL) { + xmlChar *buffer; + + /* an entity content can be any "well balanced chunk", + * i.e. the result of the content [43] production: + * http://www.w3.org/TR/REC-xml#NT-content. + * So it can contain text, CDATA section or nested + * entity reference nodes (among others). + * -> we recursive call xmlNodeListGetRawString() + * which handles these types */ + buffer = + xmlNodeListGetRawString(doc, ent->children, 1); + if (buffer != NULL) { + ret = xmlStrcat(ret, buffer); + xmlFree(buffer); + } + } else { + ret = xmlStrcat(ret, node->content); + } + } else { + xmlChar buf[2]; + + buf[0] = '&'; + buf[1] = 0; + ret = xmlStrncat(ret, buf, 1); + ret = xmlStrcat(ret, node->name); + buf[0] = ';'; + buf[1] = 0; + ret = xmlStrncat(ret, buf, 1); + } + } +#if 0 + else { + xmlGenericError(xmlGenericErrorContext, + "xmlGetNodeListString : invalid node type %d\n", + node->type); + } +#endif + node = node->next; + } + return (ret); +} + +/** + * xmlNewProp: + * @node: the holding node + * @name: the name of the attribute + * @value: the value of the attribute + * + * Create a new property carried by a node. + * Returns a pointer to the attribute + */ +xmlAttrPtr +xmlNewProp(xmlNodePtr node, const xmlChar *name, const xmlChar *value) { + xmlAttrPtr cur; + xmlDocPtr doc = NULL; + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewProp : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new property and fill the fields. + */ + cur = (xmlAttrPtr) xmlMalloc(sizeof(xmlAttr)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewProp : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlAttr)); + cur->type = XML_ATTRIBUTE_NODE; + + cur->parent = node; + if (node != NULL) { + doc = node->doc; + cur->doc = doc; + } + cur->name = xmlStrdup(name); + if (value != NULL) { + xmlChar *buffer; + xmlNodePtr tmp; + + buffer = xmlEncodeEntitiesReentrant(doc, value); + cur->children = xmlStringGetNodeList(doc, buffer); + cur->last = NULL; + tmp = cur->children; + while (tmp != NULL) { + tmp->parent = (xmlNodePtr) cur; + tmp->doc = doc; + if (tmp->next == NULL) + cur->last = tmp; + tmp = tmp->next; + } + xmlFree(buffer); + } + + /* + * Add it at the end to preserve parsing order ... + */ + if (node != NULL) { + if (node->properties == NULL) { + node->properties = cur; + } else { + xmlAttrPtr prev = node->properties; + + while (prev->next != NULL) prev = prev->next; + prev->next = cur; + cur->prev = prev; + } + } + return(cur); +} + +/** + * xmlNewNsProp: + * @node: the holding node + * @ns: the namespace + * @name: the name of the attribute + * @value: the value of the attribute + * + * Create a new property tagged with a namespace and carried by a node. + * Returns a pointer to the attribute + */ +xmlAttrPtr +xmlNewNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name, + const xmlChar *value) { + xmlAttrPtr cur; + xmlDocPtr doc = NULL; + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewNsProp : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new property and fill the fields. + */ + cur = (xmlAttrPtr) xmlMalloc(sizeof(xmlAttr)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewNsProp : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlAttr)); + cur->type = XML_ATTRIBUTE_NODE; + + cur->parent = node; + if (node != NULL) { + doc = node->doc; + cur->doc = doc; + } + cur->ns = ns; + cur->name = xmlStrdup(name); + if (value != NULL) { + xmlChar *buffer; + xmlNodePtr tmp; + + buffer = xmlEncodeEntitiesReentrant(doc, value); + cur->children = xmlStringGetNodeList(doc, buffer); + cur->last = NULL; + tmp = cur->children; + while (tmp != NULL) { + tmp->parent = (xmlNodePtr) cur; + if (tmp->next == NULL) + cur->last = tmp; + tmp = tmp->next; + } + xmlFree(buffer); + } + + /* + * Add it at the end to preserve parsing order ... + */ + if (node != NULL) { + if (node->properties == NULL) { + node->properties = cur; + } else { + xmlAttrPtr prev = node->properties; + + while (prev->next != NULL) prev = prev->next; + prev->next = cur; + cur->prev = prev; + } + } + return(cur); +} + +/** + * xmlNewNsPropEatName: + * @node: the holding node + * @ns: the namespace + * @name: the name of the attribute + * @value: the value of the attribute + * + * Create a new property tagged with a namespace and carried by a node. + * Returns a pointer to the attribute + */ +xmlAttrPtr +xmlNewNsPropEatName(xmlNodePtr node, xmlNsPtr ns, xmlChar *name, + const xmlChar *value) { + xmlAttrPtr cur; + xmlDocPtr doc = NULL; + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewNsPropEatName : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new property and fill the fields. + */ + cur = (xmlAttrPtr) xmlMalloc(sizeof(xmlAttr)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewNsPropEatName : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlAttr)); + cur->type = XML_ATTRIBUTE_NODE; + + cur->parent = node; + if (node != NULL) { + doc = node->doc; + cur->doc = doc; + } + cur->ns = ns; + cur->name = name; + if (value != NULL) { + xmlChar *buffer; + xmlNodePtr tmp; + + buffer = xmlEncodeEntitiesReentrant(doc, value); + cur->children = xmlStringGetNodeList(doc, buffer); + cur->last = NULL; + tmp = cur->children; + while (tmp != NULL) { + tmp->parent = (xmlNodePtr) cur; + if (tmp->next == NULL) + cur->last = tmp; + tmp = tmp->next; + } + xmlFree(buffer); + } + + /* + * Add it at the end to preserve parsing order ... + */ + if (node != NULL) { + if (node->properties == NULL) { + node->properties = cur; + } else { + xmlAttrPtr prev = node->properties; + + while (prev->next != NULL) prev = prev->next; + prev->next = cur; + cur->prev = prev; + } + } + return(cur); +} + +/** + * xmlNewDocProp: + * @doc: the document + * @name: the name of the attribute + * @value: the value of the attribute + * + * Create a new property carried by a document. + * Returns a pointer to the attribute + */ +xmlAttrPtr +xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value) { + xmlAttrPtr cur; + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewDocProp : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new property and fill the fields. + */ + cur = (xmlAttrPtr) xmlMalloc(sizeof(xmlAttr)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewDocProp : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlAttr)); + cur->type = XML_ATTRIBUTE_NODE; + + cur->name = xmlStrdup(name); + cur->doc = doc; + if (value != NULL) { + xmlNodePtr tmp; + + cur->children = xmlStringGetNodeList(doc, value); + cur->last = NULL; + + tmp = cur->children; + while (tmp != NULL) { + tmp->parent = (xmlNodePtr) cur; + if (tmp->next == NULL) + cur->last = tmp; + tmp = tmp->next; + } + } + return(cur); +} + +/** + * xmlFreePropList: + * @cur: the first property in the list + * + * Free a property and all its siblings, all the children are freed too. + */ +void +xmlFreePropList(xmlAttrPtr cur) { + xmlAttrPtr next; + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlFreePropList : property == NULL\n"); +#endif + return; + } + while (cur != NULL) { + next = cur->next; + xmlFreeProp(cur); + cur = next; + } +} + +/** + * xmlFreeProp: + * @cur: an attribute + * + * Free one attribute, all the content is freed too + */ +void +xmlFreeProp(xmlAttrPtr cur) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlFreeProp : property == NULL\n"); +#endif + return; + } + /* Check for ID removal -> leading to invalid references ! */ + if ((cur->parent != NULL) && (cur->parent->doc != NULL) && + ((cur->parent->doc->intSubset != NULL) || + (cur->parent->doc->extSubset != NULL))) { + if (xmlIsID(cur->parent->doc, cur->parent, cur)) + xmlRemoveID(cur->parent->doc, cur); + } + if (cur->name != NULL) xmlFree((char *) cur->name); + if (cur->children != NULL) xmlFreeNodeList(cur->children); + xmlFree(cur); +} + +/** + * xmlRemoveProp: + * @cur: an attribute + * + * Unlink and free one attribute, all the content is freed too + * Note this doesn't work for namespace definition attributes + * + * Returns 0 if success and -1 in case of error. + */ +int +xmlRemoveProp(xmlAttrPtr cur) { + xmlAttrPtr tmp; + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlRemoveProp : cur == NULL\n"); +#endif + return(-1); + } + if (cur->parent == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlRemoveProp : cur->parent == NULL\n"); +#endif + return(-1); + } + tmp = cur->parent->properties; + if (tmp == cur) { + cur->parent->properties = cur->next; + xmlFreeProp(cur); + return(0); + } + while (tmp != NULL) { + if (tmp->next == cur) { + tmp->next = cur->next; + if (tmp->next != NULL) + tmp->next->prev = tmp; + xmlFreeProp(cur); + return(0); + } + tmp = tmp->next; + } +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlRemoveProp : attribute not owned by its node\n"); +#endif + return(-1); +} + +/** + * xmlNewPI: + * @name: the processing instruction name + * @content: the PI content + * + * Creation of a processing instruction element. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewPI(const xmlChar *name, const xmlChar *content) { + xmlNodePtr cur; + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewPI : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewPI : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_PI_NODE; + + cur->name = xmlStrdup(name); + if (content != NULL) { + cur->content = xmlStrdup(content); + } + return(cur); +} + +/** + * xmlNewNode: + * @ns: namespace if any + * @name: the node name + * + * Creation of a new node element. @ns is optional (NULL). + * + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewNode(xmlNsPtr ns, const xmlChar *name) { + xmlNodePtr cur; + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewNode : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewNode : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_ELEMENT_NODE; + + cur->name = xmlStrdup(name); + cur->ns = ns; + return(cur); +} + +/** + * xmlNewNodeEatName: + * @ns: namespace if any + * @name: the node name + * + * Creation of a new node element. @ns is optional (NULL). + * + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewNodeEatName(xmlNsPtr ns, xmlChar *name) { + xmlNodePtr cur; + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewNode : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewNode : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_ELEMENT_NODE; + + cur->name = name; + cur->ns = ns; + return(cur); +} + +/** + * xmlNewDocNode: + * @doc: the document + * @ns: namespace if any + * @name: the node name + * @content: the XML text content if any + * + * Creation of a new node element within a document. @ns and @content + * are optional (NULL). + * NOTE: @content is supposed to be a piece of XML CDATA, so it allow entities + * references, but XML special chars need to be escaped first by using + * xmlEncodeEntitiesReentrant(). Use xmlNewDocRawNode() if you don't + * need entities support. + * + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewDocNode(xmlDocPtr doc, xmlNsPtr ns, + const xmlChar *name, const xmlChar *content) { + xmlNodePtr cur; + + cur = xmlNewNode(ns, name); + if (cur != NULL) { + cur->doc = doc; + if (content != NULL) { + cur->children = xmlStringGetNodeList(doc, content); + UPDATE_LAST_CHILD_AND_PARENT(cur) + } + } + return(cur); +} + +/** + * xmlNewDocNodeEatName: + * @doc: the document + * @ns: namespace if any + * @name: the node name + * @content: the XML text content if any + * + * Creation of a new node element within a document. @ns and @content + * are optional (NULL). + * NOTE: @content is supposed to be a piece of XML CDATA, so it allow entities + * references, but XML special chars need to be escaped first by using + * xmlEncodeEntitiesReentrant(). Use xmlNewDocRawNode() if you don't + * need entities support. + * + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewDocNodeEatName(xmlDocPtr doc, xmlNsPtr ns, + xmlChar *name, const xmlChar *content) { + xmlNodePtr cur; + + cur = xmlNewNodeEatName(ns, name); + if (cur != NULL) { + cur->doc = doc; + if (content != NULL) { + cur->children = xmlStringGetNodeList(doc, content); + UPDATE_LAST_CHILD_AND_PARENT(cur) + } + } + return(cur); +} + + +/** + * xmlNewDocRawNode: + * @doc: the document + * @ns: namespace if any + * @name: the node name + * @content: the text content if any + * + * Creation of a new node element within a document. @ns and @content + * are optional (NULL). + * + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewDocRawNode(xmlDocPtr doc, xmlNsPtr ns, + const xmlChar *name, const xmlChar *content) { + xmlNodePtr cur; + + cur = xmlNewNode(ns, name); + if (cur != NULL) { + cur->doc = doc; + if (content != NULL) { + cur->children = xmlNewDocText(doc, content); + UPDATE_LAST_CHILD_AND_PARENT(cur) + } + } + return(cur); +} + +/** + * xmlNewDocFragment: + * @doc: the document owning the fragment + * + * Creation of a new Fragment node. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewDocFragment(xmlDocPtr doc) { + xmlNodePtr cur; + + /* + * Allocate a new DocumentFragment node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewDocFragment : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_DOCUMENT_FRAG_NODE; + + cur->doc = doc; + return(cur); +} + +/** + * xmlNewText: + * @content: the text content + * + * Creation of a new text node. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewText(const xmlChar *content) { + xmlNodePtr cur; + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewText : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_TEXT_NODE; + + cur->name = xmlStringText; + if (content != NULL) { + cur->content = xmlStrdup(content); + } + return(cur); +} + +/** + * xmlNewTextChild: + * @parent: the parent node + * @ns: a namespace if any + * @name: the name of the child + * @content: the text content of the child if any. + * + * Creation of a new child element, added at the end of @parent children list. + * @ns and @content parameters are optional (NULL). If content is non NULL, + * a child TEXT node will be created containing the string content. + * + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewTextChild(xmlNodePtr parent, xmlNsPtr ns, + const xmlChar *name, const xmlChar *content) { + xmlNodePtr cur, prev; + + if (parent == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewTextChild : parent == NULL\n"); +#endif + return(NULL); + } + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewTextChild : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new node + */ + if (ns == NULL) + cur = xmlNewDocRawNode(parent->doc, parent->ns, name, content); + else + cur = xmlNewDocRawNode(parent->doc, ns, name, content); + if (cur == NULL) return(NULL); + + /* + * add the new element at the end of the children list. + */ + cur->type = XML_ELEMENT_NODE; + cur->parent = parent; + cur->doc = parent->doc; + if (parent->children == NULL) { + parent->children = cur; + parent->last = cur; + } else { + prev = parent->last; + prev->next = cur; + cur->prev = prev; + parent->last = cur; + } + + return(cur); +} + +/** + * xmlNewCharRef: + * @doc: the document + * @name: the char ref string, starting with # or "&# ... ;" + * + * Creation of a new character reference node. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewCharRef(xmlDocPtr doc, const xmlChar *name) { + xmlNodePtr cur; + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewCharRef : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_ENTITY_REF_NODE; + + cur->doc = doc; + if (name[0] == '&') { + int len; + name++; + len = xmlStrlen(name); + if (name[len - 1] == ';') + cur->name = xmlStrndup(name, len - 1); + else + cur->name = xmlStrndup(name, len); + } else + cur->name = xmlStrdup(name); + return(cur); +} + +/** + * xmlNewReference: + * @doc: the document + * @name: the reference name, or the reference string with & and ; + * + * Creation of a new reference node. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewReference(xmlDocPtr doc, const xmlChar *name) { + xmlNodePtr cur; + xmlEntityPtr ent; + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewReference : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_ENTITY_REF_NODE; + + cur->doc = doc; + if (name[0] == '&') { + int len; + name++; + len = xmlStrlen(name); + if (name[len - 1] == ';') + cur->name = xmlStrndup(name, len - 1); + else + cur->name = xmlStrndup(name, len); + } else + cur->name = xmlStrdup(name); + + ent = xmlGetDocEntity(doc, cur->name); + if (ent != NULL) { + cur->content = ent->content; + /* + * The parent pointer in entity is a DTD pointer and thus is NOT + * updated. Not sure if this is 100% correct. + * -George + */ + cur->children = (xmlNodePtr) ent; + cur->last = (xmlNodePtr) ent; + } + return(cur); +} + +/** + * xmlNewDocText: + * @doc: the document + * @content: the text content + * + * Creation of a new text node within a document. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewDocText(xmlDocPtr doc, const xmlChar *content) { + xmlNodePtr cur; + + cur = xmlNewText(content); + if (cur != NULL) cur->doc = doc; + return(cur); +} + +/** + * xmlNewTextLen: + * @content: the text content + * @len: the text len. + * + * Creation of a new text node with an extra parameter for the content's length + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewTextLen(const xmlChar *content, int len) { + xmlNodePtr cur; + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewTextLen : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_TEXT_NODE; + + cur->name = xmlStringText; + if (content != NULL) { + cur->content = xmlStrndup(content, len); + } + return(cur); +} + +/** + * xmlNewDocTextLen: + * @doc: the document + * @content: the text content + * @len: the text len. + * + * Creation of a new text node with an extra content length parameter. The + * text node pertain to a given document. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewDocTextLen(xmlDocPtr doc, const xmlChar *content, int len) { + xmlNodePtr cur; + + cur = xmlNewTextLen(content, len); + if (cur != NULL) cur->doc = doc; + return(cur); +} + +/** + * xmlNewComment: + * @content: the comment content + * + * Creation of a new node containing a comment. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewComment(const xmlChar *content) { + xmlNodePtr cur; + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewComment : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_COMMENT_NODE; + + cur->name = xmlStringComment; + if (content != NULL) { + cur->content = xmlStrdup(content); + } + return(cur); +} + +/** + * xmlNewCDataBlock: + * @doc: the document + * @content: the CDATA block content content + * @len: the length of the block + * + * Creation of a new node containing a CDATA block. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewCDataBlock(xmlDocPtr doc, const xmlChar *content, int len) { + xmlNodePtr cur; + + /* + * Allocate a new node and fill the fields. + */ + cur = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewCDataBlock : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNode)); + cur->type = XML_CDATA_SECTION_NODE; + cur->doc = doc; + + if (content != NULL) { + cur->content = xmlStrndup(content, len); + } + return(cur); +} + +/** + * xmlNewDocComment: + * @doc: the document + * @content: the comment content + * + * Creation of a new node containing a comment within a document. + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewDocComment(xmlDocPtr doc, const xmlChar *content) { + xmlNodePtr cur; + + cur = xmlNewComment(content); + if (cur != NULL) cur->doc = doc; + return(cur); +} + +/** + * xmlSetTreeDoc: + * @tree: the top element + * @doc: the document + * + * update all nodes under the tree to point to the right document + */ +void +xmlSetTreeDoc(xmlNodePtr tree, xmlDocPtr doc) { + xmlAttrPtr prop; + + if (tree == NULL) + return; + if (tree->doc != doc) { + if(tree->type == XML_ELEMENT_NODE) { + prop = tree->properties; + while (prop != NULL) { + prop->doc = doc; + xmlSetListDoc(prop->children, doc); + prop = prop->next; + } + } + if (tree->children != NULL) + xmlSetListDoc(tree->children, doc); + tree->doc = doc; + } +} + +/** + * xmlSetListDoc: + * @list: the first element + * @doc: the document + * + * update all nodes in the list to point to the right document + */ +void +xmlSetListDoc(xmlNodePtr list, xmlDocPtr doc) { + xmlNodePtr cur; + + if (list == NULL) + return; + cur = list; + while (cur != NULL) { + if (cur->doc != doc) + xmlSetTreeDoc(cur, doc); + cur = cur->next; + } +} + + +/** + * xmlNewChild: + * @parent: the parent node + * @ns: a namespace if any + * @name: the name of the child + * @content: the XML content of the child if any. + * + * Creation of a new child element, added at the end of @parent children list. + * @ns and @content parameters are optional (NULL). If content is non NULL, + * a child list containing the TEXTs and ENTITY_REFs node will be created. + * NOTE: @content is supposed to be a piece of XML CDATA, so it allow entities + * references, but XML special chars need to be escaped first by using + * xmlEncodeEntitiesReentrant(). Use xmlNewTextChild() if entities + * support is not needed. + * + * Returns a pointer to the new node object. + */ +xmlNodePtr +xmlNewChild(xmlNodePtr parent, xmlNsPtr ns, + const xmlChar *name, const xmlChar *content) { + xmlNodePtr cur, prev; + + if (parent == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewChild : parent == NULL\n"); +#endif + return(NULL); + } + + if (name == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewChild : name == NULL\n"); +#endif + return(NULL); + } + + /* + * Allocate a new node + */ + if (parent->type == XML_ELEMENT_NODE) { + if (ns == NULL) + cur = xmlNewDocNode(parent->doc, parent->ns, name, content); + else + cur = xmlNewDocNode(parent->doc, ns, name, content); + } else if ((parent->type == XML_DOCUMENT_NODE) || + (parent->type == XML_HTML_DOCUMENT_NODE)) { + if (ns == NULL) + cur = xmlNewDocNode((xmlDocPtr) parent, NULL, name, content); + else + cur = xmlNewDocNode((xmlDocPtr) parent, ns, name, content); + } else if (parent->type == XML_DOCUMENT_FRAG_NODE) { + cur = xmlNewDocNode( parent->doc, ns, name, content); + } else { + return(NULL); + } + if (cur == NULL) return(NULL); + + /* + * add the new element at the end of the children list. + */ + cur->type = XML_ELEMENT_NODE; + cur->parent = parent; + cur->doc = parent->doc; + if (parent->children == NULL) { + parent->children = cur; + parent->last = cur; + } else { + prev = parent->last; + prev->next = cur; + cur->prev = prev; + parent->last = cur; + } + + return(cur); +} + +/** + * xmlAddNextSibling: + * @cur: the child node + * @elem: the new node + * + * Add a new node @elem as the next sibling of @cur + * If the new node was already inserted in a document it is + * first unlinked from its existing context. + * As a result of text merging @elem may be freed. + * If the new node is ATTRIBUTE, it is added into properties instead of children. + * If there is an attribute with equal name, it is first destroyed. + * + * Returns the new node or NULL in case of error. + */ +xmlNodePtr +xmlAddNextSibling(xmlNodePtr cur, xmlNodePtr elem) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddNextSibling : cur == NULL\n"); +#endif + return(NULL); + } + if (elem == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddNextSibling : elem == NULL\n"); +#endif + return(NULL); + } + + xmlUnlinkNode(elem); + + if (elem->type == XML_TEXT_NODE) { + if (cur->type == XML_TEXT_NODE) { + xmlNodeAddContent(cur, elem->content); + xmlFreeNode(elem); + return(cur); + } + if ((cur->next != NULL) && (cur->next->type == XML_TEXT_NODE) && + (cur->name == cur->next->name)) { + xmlChar *tmp; + + tmp = xmlStrdup(elem->content); + tmp = xmlStrcat(tmp, cur->next->content); + xmlNodeSetContent(cur->next, tmp); + xmlFree(tmp); + xmlFreeNode(elem); + return(cur->next); + } + } else if (elem->type == XML_ATTRIBUTE_NODE) { + /* check if an attribute with the same name exists */ + xmlAttrPtr attr; + + if (elem->ns == NULL) + attr = xmlHasProp(cur->parent, elem->name); + else + attr = xmlHasNsProp(cur->parent, elem->name, elem->ns->href); + if ((attr != NULL) && (attr != (xmlAttrPtr) elem)) { + /* different instance, destroy it (attributes must be unique) */ + xmlFreeProp(attr); + } + } + + if (elem->doc != cur->doc) { + xmlSetTreeDoc(elem, cur->doc); + } + elem->parent = cur->parent; + elem->prev = cur; + elem->next = cur->next; + cur->next = elem; + if (elem->next != NULL) + elem->next->prev = elem; + if ((elem->parent != NULL) && (elem->parent->last == cur) && (elem->type != XML_ATTRIBUTE_NODE)) + elem->parent->last = elem; + return(elem); +} + +/** + * xmlAddPrevSibling: + * @cur: the child node + * @elem: the new node + * + * Add a new node @elem as the previous sibling of @cur + * merging adjacent TEXT nodes (@elem may be freed) + * If the new node was already inserted in a document it is + * first unlinked from its existing context. + * If the new node is ATTRIBUTE, it is added into properties instead of children. + * If there is an attribute with equal name, it is first destroyed. + * + * Returns the new node or NULL in case of error. + */ +xmlNodePtr +xmlAddPrevSibling(xmlNodePtr cur, xmlNodePtr elem) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddPrevSibling : cur == NULL\n"); +#endif + return(NULL); + } + if (elem == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddPrevSibling : elem == NULL\n"); +#endif + return(NULL); + } + + xmlUnlinkNode(elem); + + if (elem->type == XML_TEXT_NODE) { + if (cur->type == XML_TEXT_NODE) { + xmlChar *tmp; + + tmp = xmlStrdup(elem->content); + tmp = xmlStrcat(tmp, cur->content); + xmlNodeSetContent(cur, tmp); + xmlFree(tmp); + xmlFreeNode(elem); + return(cur); + } + if ((cur->prev != NULL) && (cur->prev->type == XML_TEXT_NODE) && + (cur->name == cur->prev->name)) { + xmlNodeAddContent(cur->prev, elem->content); + xmlFreeNode(elem); + return(cur->prev); + } + } else if (elem->type == XML_ATTRIBUTE_NODE) { + /* check if an attribute with the same name exists */ + xmlAttrPtr attr; + + if (elem->ns == NULL) + attr = xmlHasProp(cur->parent, elem->name); + else + attr = xmlHasNsProp(cur->parent, elem->name, elem->ns->href); + if ((attr != NULL) && (attr != (xmlAttrPtr) elem)) { + /* different instance, destroy it (attributes must be unique) */ + xmlFreeProp(attr); + } + } + + if (elem->doc != cur->doc) { + xmlSetTreeDoc(elem, cur->doc); + } + elem->parent = cur->parent; + elem->next = cur; + elem->prev = cur->prev; + cur->prev = elem; + if (elem->prev != NULL) + elem->prev->next = elem; + if (elem->parent != NULL) { + if (elem->type == XML_ATTRIBUTE_NODE) { + if (elem->parent->properties == (xmlAttrPtr) cur) { + elem->parent->properties = (xmlAttrPtr) elem; + } + } else { + if (elem->parent->children == cur) { + elem->parent->children = elem; + } + } + } + return(elem); +} + +/** + * xmlAddSibling: + * @cur: the child node + * @elem: the new node + * + * Add a new element @elem to the list of siblings of @cur + * merging adjacent TEXT nodes (@elem may be freed) + * If the new element was already inserted in a document it is + * first unlinked from its existing context. + * + * Returns the new element or NULL in case of error. + */ +xmlNodePtr +xmlAddSibling(xmlNodePtr cur, xmlNodePtr elem) { + xmlNodePtr parent; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddSibling : cur == NULL\n"); +#endif + return(NULL); + } + + if (elem == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddSibling : elem == NULL\n"); +#endif + return(NULL); + } + + /* + * Constant time is we can rely on the ->parent->last to find + * the last sibling. + */ + if ((cur->parent != NULL) && + (cur->parent->children != NULL) && + (cur->parent->last != NULL) && + (cur->parent->last->next == NULL)) { + cur = cur->parent->last; + } else { + while (cur->next != NULL) cur = cur->next; + } + + xmlUnlinkNode(elem); + + if ((cur->type == XML_TEXT_NODE) && (elem->type == XML_TEXT_NODE)) { + xmlNodeAddContent(cur, elem->content); + xmlFreeNode(elem); + return(cur); + } + + if (elem->doc != cur->doc) { + xmlSetTreeDoc(elem, cur->doc); + } + parent = cur->parent; + elem->prev = cur; + elem->next = NULL; + elem->parent = parent; + cur->next = elem; + if (parent != NULL) + parent->last = elem; + + return(elem); +} + +/** + * xmlAddChildList: + * @parent: the parent node + * @cur: the first node in the list + * + * Add a list of node at the end of the child list of the parent + * merging adjacent TEXT nodes (@cur may be freed) + * + * Returns the last child or NULL in case of error. + */ +xmlNodePtr +xmlAddChildList(xmlNodePtr parent, xmlNodePtr cur) { + xmlNodePtr prev; + + if (parent == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddChildList : parent == NULL\n"); +#endif + return(NULL); + } + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddChildList : child == NULL\n"); +#endif + return(NULL); + } + + if ((cur->doc != NULL) && (parent->doc != NULL) && + (cur->doc != parent->doc)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "Elements moved to a different document\n"); +#endif + } + + /* + * add the first element at the end of the children list. + */ + if (parent->children == NULL) { + parent->children = cur; + } else { + /* + * If cur and parent->last both are TEXT nodes, then merge them. + */ + if ((cur->type == XML_TEXT_NODE) && + (parent->last->type == XML_TEXT_NODE) && + (cur->name == parent->last->name)) { + xmlNodeAddContent(parent->last, cur->content); + /* + * if it's the only child, nothing more to be done. + */ + if (cur->next == NULL) { + xmlFreeNode(cur); + return(parent->last); + } + prev = cur; + cur = cur->next; + xmlFreeNode(prev); + } + prev = parent->last; + prev->next = cur; + cur->prev = prev; + } + while (cur->next != NULL) { + cur->parent = parent; + if (cur->doc != parent->doc) { + xmlSetTreeDoc(cur, parent->doc); + } + cur = cur->next; + } + cur->parent = parent; + cur->doc = parent->doc; /* the parent may not be linked to a doc ! */ + parent->last = cur; + + return(cur); +} + +/** + * xmlAddChild: + * @parent: the parent node + * @cur: the child node + * + * Add a new node to @parent, at the end of the child (or property) list + * merging adjacent TEXT nodes (in which case @cur is freed) + * If the new node was already inserted in a document it is + * first unlinked from its existing context. + * If the new node is ATTRIBUTE, it is added into properties instead of children. + * If there is an attribute with equal name, it is first destroyed. + * + * Returns the child or NULL in case of error. + */ +xmlNodePtr +xmlAddChild(xmlNodePtr parent, xmlNodePtr cur) { + xmlNodePtr prev; + + if (parent == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddChild : parent == NULL\n"); +#endif + return(NULL); + } + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAddChild : child == NULL\n"); +#endif + return(NULL); + } + + /* + * If cur is a TEXT node, merge its content with adjacent TEXT nodes + * cur is then freed. + */ + if (cur->type == XML_TEXT_NODE) { + if ((parent->type == XML_TEXT_NODE) && + (parent->content != NULL)) { + xmlNodeAddContent(parent, cur->content); + xmlFreeNode(cur); + return(parent); + } + if ((parent->last != NULL) && (parent->last->type == XML_TEXT_NODE) && + (parent->last->name == cur->name)) { + xmlNodeAddContent(parent->last, cur->content); + xmlFreeNode(cur); + return(parent->last); + } + } + + /* + * add the new element at the end of the children list. + */ + cur->parent = parent; + if (cur->doc != parent->doc) { + xmlSetTreeDoc(cur, parent->doc); + } + + /* + * Coalescing + */ + if ((parent->type == XML_TEXT_NODE) && + (parent->content != NULL)) { + xmlNodeAddContent(parent, cur->content); + xmlFreeNode(cur); + return(parent); + } + if (cur->type == XML_ATTRIBUTE_NODE) { + if (parent->properties == NULL) { + parent->properties = (xmlAttrPtr) cur; + } else { + /* check if an attribute with the same name exists */ + xmlAttrPtr lastattr; + + if (cur->ns == NULL) + lastattr = xmlHasProp(parent, cur->name); + else + lastattr = xmlHasNsProp(parent, cur->name, cur->ns->href); + if ((lastattr != NULL) && (lastattr != (xmlAttrPtr) cur)) { + /* different instance, destroy it (attributes must be unique) */ + xmlFreeProp(lastattr); + } + /* find the end */ + lastattr = parent->properties; + while (lastattr->next != NULL) { + lastattr = lastattr->next; + } + lastattr->next = (xmlAttrPtr) cur; + ((xmlAttrPtr) cur)->prev = lastattr; + } + } else { + if (parent->children == NULL) { + parent->children = cur; + parent->last = cur; + } else { + prev = parent->last; + prev->next = cur; + cur->prev = prev; + parent->last = cur; + } + } + return(cur); +} + +/** + * xmlGetLastChild: + * @parent: the parent node + * + * Search the last child of a node. + * Returns the last child or NULL if none. + */ +xmlNodePtr +xmlGetLastChild(xmlNodePtr parent) { + if (parent == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlGetLastChild : parent == NULL\n"); +#endif + return(NULL); + } + return(parent->last); +} + +/** + * xmlFreeNodeList: + * @cur: the first node in the list + * + * Free a node and all its siblings, this is a recursive behaviour, all + * the children are freed too. + */ +void +xmlFreeNodeList(xmlNodePtr cur) { + xmlNodePtr next; + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlFreeNodeList : node == NULL\n"); +#endif + return; + } + if (cur->type == XML_NAMESPACE_DECL) { + xmlFreeNsList((xmlNsPtr) cur); + return; + } + while (cur != NULL) { + next = cur->next; + /* unroll to speed up freeing the document */ + if (cur->type != XML_DTD_NODE) { + if ((cur->children != NULL) && + (cur->type != XML_ENTITY_REF_NODE)) + xmlFreeNodeList(cur->children); + if (((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_XINCLUDE_START) || + (cur->type == XML_XINCLUDE_END)) && + (cur->properties != NULL)) + xmlFreePropList(cur->properties); + if ((cur->type != XML_ELEMENT_NODE) && + (cur->type != XML_XINCLUDE_START) && + (cur->type != XML_XINCLUDE_END) && + (cur->type != XML_ENTITY_REF_NODE)) { + if (cur->content != NULL) xmlFree(cur->content); + } + if (((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_XINCLUDE_START) || + (cur->type == XML_XINCLUDE_END)) && + (cur->nsDef != NULL)) + xmlFreeNsList(cur->nsDef); + + /* + * When a node is a text node or a comment, it uses a global static + * variable for the name of the node. + * + * The xmlStrEqual comparisons need to be done when (happened with + * XML::libXML and XML::libXSLT) the library is included twice + * statically in the binary and a tree allocated by one occurrence + * of the lib gets freed by the other occurrence, in this case + * the string addresses compare are not sufficient. + */ + if ((cur->name != NULL) && + (cur->name != xmlStringText) && + (cur->name != xmlStringTextNoenc) && + (cur->name != xmlStringComment)) { + if (cur->type == XML_TEXT_NODE) { + if ((!xmlStrEqual(cur->name, xmlStringText)) && + (!xmlStrEqual(cur->name, xmlStringTextNoenc))) + xmlFree((char *) cur->name); + } else if (cur->type == XML_COMMENT_NODE) { + if (!xmlStrEqual(cur->name, xmlStringComment)) + xmlFree((char *) cur->name); + } else + xmlFree((char *) cur->name); + } + /* TODO : derecursivate this function */ + xmlFree(cur); + } + cur = next; + } +} + +/** + * xmlFreeNode: + * @cur: the node + * + * Free a node, this is a recursive behaviour, all the children are freed too. + * This doesn't unlink the child from the list, use xmlUnlinkNode() first. + */ +void +xmlFreeNode(xmlNodePtr cur) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlFreeNode : node == NULL\n"); +#endif + return; + } + /* use xmlFreeDtd for DTD nodes */ + if (cur->type == XML_DTD_NODE) { + xmlFreeDtd((xmlDtdPtr) cur); + return; + } + if (cur->type == XML_NAMESPACE_DECL) { + xmlFreeNs((xmlNsPtr) cur); + return; + } + if (cur->type == XML_ATTRIBUTE_NODE) { + xmlFreeProp((xmlAttrPtr) cur); + return; + } + if ((cur->children != NULL) && + (cur->type != XML_ENTITY_REF_NODE)) + xmlFreeNodeList(cur->children); + if (((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_XINCLUDE_START) || + (cur->type == XML_XINCLUDE_END)) && + (cur->properties != NULL)) + xmlFreePropList(cur->properties); + if ((cur->type != XML_ELEMENT_NODE) && + (cur->content != NULL) && + (cur->type != XML_ENTITY_REF_NODE) && + (cur->type != XML_XINCLUDE_END) && + (cur->type != XML_XINCLUDE_START)) { + xmlFree(cur->content); + } + + /* + * When a node is a text node or a comment, it uses a global static + * variable for the name of the node. + * + * The xmlStrEqual comparisons need to be done when (happened with + * XML::libXML and XML::libXSLT) the library is included twice statically + * in the binary and a tree allocated by one occurence of the lib gets + * freed by the other occurrence, in this case the string addresses compare + * are not sufficient. + */ + if ((cur->name != NULL) && + (cur->name != xmlStringText) && + (cur->name != xmlStringTextNoenc) && + (cur->name != xmlStringComment)) { + if (cur->type == XML_TEXT_NODE) { + if ((!xmlStrEqual(cur->name, xmlStringText)) && + (!xmlStrEqual(cur->name, xmlStringTextNoenc))) + xmlFree((char *) cur->name); + } else if (cur->type == XML_COMMENT_NODE) { + if (!xmlStrEqual(cur->name, xmlStringComment)) + xmlFree((char *) cur->name); + } else + xmlFree((char *) cur->name); + } + + if (((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_XINCLUDE_START) || + (cur->type == XML_XINCLUDE_END)) && + (cur->nsDef != NULL)) + xmlFreeNsList(cur->nsDef); + xmlFree(cur); +} + +/** + * xmlUnlinkNode: + * @cur: the node + * + * Unlink a node from it's current context, the node is not freed + */ +void +xmlUnlinkNode(xmlNodePtr cur) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlUnlinkNode : node == NULL\n"); +#endif + return; + } + if (cur->type == XML_DTD_NODE) { + xmlDocPtr doc; + doc = cur->doc; + if (doc->intSubset == (xmlDtdPtr) cur) + doc->intSubset = NULL; + if (doc->extSubset == (xmlDtdPtr) cur) + doc->extSubset = NULL; + } + if (cur->parent != NULL) { + xmlNodePtr parent; + parent = cur->parent; + if (cur->type == XML_ATTRIBUTE_NODE) { + if (parent->properties == (xmlAttrPtr) cur) + parent->properties = ((xmlAttrPtr) cur)->next; + } else { + if (parent->children == cur) + parent->children = cur->next; + if (parent->last == cur) + parent->last = cur->prev; + } + cur->parent = NULL; + } + if (cur->next != NULL) + cur->next->prev = cur->prev; + if (cur->prev != NULL) + cur->prev->next = cur->next; + cur->next = cur->prev = NULL; +} + +/** + * xmlReplaceNode: + * @old: the old node + * @cur: the node + * + * Unlink the old node from it's current context, prune the new one + * at the same place. If @cur was already inserted in a document it is + * first unlinked from its existing context. + * + * Returns the @old node + */ +xmlNodePtr +xmlReplaceNode(xmlNodePtr old, xmlNodePtr cur) { + if (old == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlReplaceNode : old == NULL\n"); +#endif + return(NULL); + } + if (cur == NULL) { + xmlUnlinkNode(old); + return(old); + } + if (cur == old) { + return(old); + } + if ((old->type==XML_ATTRIBUTE_NODE) && (cur->type!=XML_ATTRIBUTE_NODE)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlReplaceNode : Trying to replace attribute node with other node type\n"); +#endif + return(old); + } + if ((cur->type==XML_ATTRIBUTE_NODE) && (old->type!=XML_ATTRIBUTE_NODE)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlReplaceNode : Trying to replace a non-attribute node with attribute node\n"); +#endif + return(old); + } + if ((old->type==XML_ATTRIBUTE_NODE) && (cur->type!=XML_ATTRIBUTE_NODE)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlReplaceNode : Trying to replace attribute node with other node type\n"); +#endif + return(old); + } + if ((cur->type==XML_ATTRIBUTE_NODE) && (old->type!=XML_ATTRIBUTE_NODE)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlReplaceNode : Trying to replace a non-attribute node with attribute node\n"); +#endif + return(old); + } + xmlUnlinkNode(cur); + cur->doc = old->doc; + cur->parent = old->parent; + cur->next = old->next; + if (cur->next != NULL) + cur->next->prev = cur; + cur->prev = old->prev; + if (cur->prev != NULL) + cur->prev->next = cur; + if (cur->parent != NULL) { + if (cur->type == XML_ATTRIBUTE_NODE) { + if (cur->parent->properties == (xmlAttrPtr)old) + cur->parent->properties = ((xmlAttrPtr) cur); + } else { + if (cur->parent->children == old) + cur->parent->children = cur; + if (cur->parent->last == old) + cur->parent->last = cur; + } + } + old->next = old->prev = NULL; + old->parent = NULL; + return(old); +} + +/************************************************************************ + * * + * Copy operations * + * * + ************************************************************************/ + +/** + * xmlCopyNamespace: + * @cur: the namespace + * + * Do a copy of the namespace. + * + * Returns: a new #xmlNsPtr, or NULL in case of error. + */ +xmlNsPtr +xmlCopyNamespace(xmlNsPtr cur) { + xmlNsPtr ret; + + if (cur == NULL) return(NULL); + switch (cur->type) { + case XML_LOCAL_NAMESPACE: + ret = xmlNewNs(NULL, cur->href, cur->prefix); + break; + default: +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlCopyNamespace: invalid type %d\n", cur->type); +#endif + return(NULL); + } + return(ret); +} + +/** + * xmlCopyNamespaceList: + * @cur: the first namespace + * + * Do a copy of an namespace list. + * + * Returns: a new #xmlNsPtr, or NULL in case of error. + */ +xmlNsPtr +xmlCopyNamespaceList(xmlNsPtr cur) { + xmlNsPtr ret = NULL; + xmlNsPtr p = NULL,q; + + while (cur != NULL) { + q = xmlCopyNamespace(cur); + if (p == NULL) { + ret = p = q; + } else { + p->next = q; + p = q; + } + cur = cur->next; + } + return(ret); +} + +static xmlNodePtr +xmlStaticCopyNodeList(xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent); +/** + * xmlCopyProp: + * @target: the element where the attribute will be grafted + * @cur: the attribute + * + * Do a copy of the attribute. + * + * Returns: a new #xmlAttrPtr, or NULL in case of error. + */ +xmlAttrPtr +xmlCopyProp(xmlNodePtr target, xmlAttrPtr cur) { + xmlAttrPtr ret; + + if (cur == NULL) return(NULL); + if (target != NULL) + ret = xmlNewDocProp(target->doc, cur->name, NULL); + else if (cur->parent != NULL) + ret = xmlNewDocProp(cur->parent->doc, cur->name, NULL); + else if (cur->children != NULL) + ret = xmlNewDocProp(cur->children->doc, cur->name, NULL); + else + ret = xmlNewDocProp(NULL, cur->name, NULL); + if (ret == NULL) return(NULL); + ret->parent = target; + + if ((cur->ns != NULL) && (target != NULL)) { + xmlNsPtr ns; +/* + * if (target->doc) + * ns = xmlSearchNs(target->doc, target, cur->ns->prefix); + * else if (cur->doc) / * target may not yet have a doc : KPI * / + * ns = xmlSearchNs(cur->doc, target, cur->ns->prefix); + * else + * ns = NULL; + * ret->ns = ns; + */ + ns = xmlSearchNs(target->doc, target, cur->ns->prefix); + if (ns == NULL) { + /* + * Humm, we are copying an element whose namespace is defined + * out of the new tree scope. Search it in the original tree + * and add it at the top of the new tree + */ + ns = xmlSearchNs(cur->doc, cur->parent, cur->ns->prefix); + if (ns != NULL) { + xmlNodePtr root = target; + xmlNodePtr pred = NULL; + + while (root->parent != NULL) { + pred = root; + root = root->parent; + } + if (root == (xmlNodePtr) target->doc) { + /* correct possibly cycling above the document elt */ + root = pred; + } + ret->ns = xmlNewNs(root, ns->href, ns->prefix); + } + } else { + /* + * we have to find something appropriate here since + * we cant be sure, that the namespce we found is identified + * by the prefix + */ + if (xmlStrEqual(ns->href, cur->ns->href)) { + /* this is the nice case */ + ret->ns = ns; + } else { + /* + * we are in trouble: we need a new reconcilied namespace. + * This is expensive + */ + ret->ns = xmlNewReconciliedNs(target->doc, target, cur->ns); + } + } + + } else + ret->ns = NULL; + + if (cur->children != NULL) { + xmlNodePtr tmp; + + ret->children = xmlStaticCopyNodeList(cur->children, ret->doc, (xmlNodePtr) ret); + ret->last = NULL; + tmp = ret->children; + while (tmp != NULL) { + /* tmp->parent = (xmlNodePtr)ret; */ + if (tmp->next == NULL) + ret->last = tmp; + tmp = tmp->next; + } + } + /* + * Try to handle IDs + */ + if ((target!= NULL) && (cur!= NULL) && + (target->doc != NULL) && (cur->doc != NULL) && + (cur->doc->ids != NULL) && (cur->parent != NULL)) { + if (xmlIsID(cur->doc, cur->parent, cur)) { + xmlChar *id; + + id = xmlNodeListGetString(cur->doc, cur->children, 1); + if (id != NULL) { + xmlAddID(NULL, target->doc, id, ret); + xmlFree(id); + } + } + } + return(ret); +} + +/** + * xmlCopyPropList: + * @target: the element where the attributes will be grafted + * @cur: the first attribute + * + * Do a copy of an attribute list. + * + * Returns: a new #xmlAttrPtr, or NULL in case of error. + */ +xmlAttrPtr +xmlCopyPropList(xmlNodePtr target, xmlAttrPtr cur) { + xmlAttrPtr ret = NULL; + xmlAttrPtr p = NULL,q; + + while (cur != NULL) { + q = xmlCopyProp(target, cur); + if (p == NULL) { + ret = p = q; + } else { + p->next = q; + q->prev = p; + p = q; + } + cur = cur->next; + } + return(ret); +} + +/* + * NOTE about the CopyNode operations ! + * + * They are split into external and internal parts for one + * tricky reason: namespaces. Doing a direct copy of a node + * say RPM:Copyright without changing the namespace pointer to + * something else can produce stale links. One way to do it is + * to keep a reference counter but this doesn't work as soon + * as one move the element or the subtree out of the scope of + * the existing namespace. The actual solution seems to add + * a copy of the namespace at the top of the copied tree if + * not available in the subtree. + * Hence two functions, the public front-end call the inner ones + */ + +static xmlNodePtr +xmlStaticCopyNode(const xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent, + int recursive) { + xmlNodePtr ret; + + if (node == NULL) return(NULL); + switch (node->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ELEMENT_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + break; + case XML_ATTRIBUTE_NODE: + return((xmlNodePtr) xmlCopyProp(parent, (xmlAttrPtr) node)); + case XML_NAMESPACE_DECL: + return((xmlNodePtr) xmlCopyNamespaceList((xmlNsPtr) node)); + + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return((xmlNodePtr) xmlCopyDoc((xmlDocPtr) node, recursive)); + case XML_DOCUMENT_TYPE_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + return(NULL); + } + + /* + * Allocate a new node and fill the fields. + */ + ret = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlStaticCopyNode : malloc failed\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlNode)); + ret->type = node->type; + + ret->doc = doc; + ret->parent = parent; + if (node->name == xmlStringText) + ret->name = xmlStringText; + else if (node->name == xmlStringTextNoenc) + ret->name = xmlStringTextNoenc; + else if (node->name == xmlStringComment) + ret->name = xmlStringComment; + else if (node->name != NULL) + ret->name = xmlStrdup(node->name); + if ((node->type != XML_ELEMENT_NODE) && + (node->content != NULL) && + (node->type != XML_ENTITY_REF_NODE) && + (node->type != XML_XINCLUDE_END) && + (node->type != XML_XINCLUDE_START)) { + ret->content = xmlStrdup(node->content); + }else{ + if (node->type == XML_ELEMENT_NODE) + ret->content = (void*)(long) node->content; + } + if (parent != NULL) { + xmlNodePtr tmp; + + tmp = xmlAddChild(parent, ret); + /* node could have coalesced */ + if (tmp != ret) + return(tmp); + } + + if (!recursive) return(ret); + if (node->nsDef != NULL) + ret->nsDef = xmlCopyNamespaceList(node->nsDef); + + if (node->ns != NULL) { + xmlNsPtr ns; + + ns = xmlSearchNs(doc, ret, node->ns->prefix); + if (ns == NULL) { + /* + * Humm, we are copying an element whose namespace is defined + * out of the new tree scope. Search it in the original tree + * and add it at the top of the new tree + */ + ns = xmlSearchNs(node->doc, node, node->ns->prefix); + if (ns != NULL) { + xmlNodePtr root = ret; + + while (root->parent != NULL) root = root->parent; + ret->ns = xmlNewNs(root, ns->href, ns->prefix); + } + } else { + /* + * reference the existing namespace definition in our own tree. + */ + ret->ns = ns; + } + } + if (node->properties != NULL) + ret->properties = xmlCopyPropList(ret, node->properties); + if (node->type == XML_ENTITY_REF_NODE) { + if ((doc == NULL) || (node->doc != doc)) { + /* + * The copied node will go into a separate document, so + * to avoid dangling references to the ENTITY_DECL node + * we cannot keep the reference. Try to find it in the + * target document. + */ + ret->children = (xmlNodePtr) xmlGetDocEntity(doc, ret->name); + } else { + ret->children = node->children; + } + ret->last = ret->children; + } else if (node->children != NULL) { + ret->children = xmlStaticCopyNodeList(node->children, doc, ret); + UPDATE_LAST_CHILD_AND_PARENT(ret) + } + return(ret); +} + +static xmlNodePtr +xmlStaticCopyNodeList(xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent) { + xmlNodePtr ret = NULL; + xmlNodePtr p = NULL,q; + + while (node != NULL) { + if (node->type == XML_DTD_NODE ) { + if (doc == NULL) { + node = node->next; + continue; + } + if (doc->intSubset == NULL) { + q = (xmlNodePtr) xmlCopyDtd( (xmlDtdPtr) node ); + q->doc = doc; + q->parent = parent; + doc->intSubset = (xmlDtdPtr) q; + xmlAddChild(parent, q); + } else { + q = (xmlNodePtr) doc->intSubset; + xmlAddChild(parent, q); + } + } else + q = xmlStaticCopyNode(node, doc, parent, 1); + if (ret == NULL) { + q->prev = NULL; + ret = p = q; + } else if (p != q) { + /* the test is required if xmlStaticCopyNode coalesced 2 text nodes */ + p->next = q; + q->prev = p; + p = q; + } + node = node->next; + } + return(ret); +} + +/** + * xmlCopyNode: + * @node: the node + * @recursive: if 1 do a recursive copy. + * + * Do a copy of the node. + * + * Returns: a new #xmlNodePtr, or NULL in case of error. + */ +xmlNodePtr +xmlCopyNode(const xmlNodePtr node, int recursive) { + xmlNodePtr ret; + + ret = xmlStaticCopyNode(node, NULL, NULL, recursive); + return(ret); +} + +/** + * xmlDocCopyNode: + * @node: the node + * @doc: the document + * @recursive: if 1 do a recursive copy. + * + * Do a copy of the node to a given document. + * + * Returns: a new #xmlNodePtr, or NULL in case of error. + */ +xmlNodePtr +xmlDocCopyNode(const xmlNodePtr node, xmlDocPtr doc, int recursive) { + xmlNodePtr ret; + + ret = xmlStaticCopyNode(node, doc, NULL, recursive); + return(ret); +} + +/** + * xmlCopyNodeList: + * @node: the first node in the list. + * + * Do a recursive copy of the node list. + * + * Returns: a new #xmlNodePtr, or NULL in case of error. + */ +xmlNodePtr xmlCopyNodeList(const xmlNodePtr node) { + xmlNodePtr ret = xmlStaticCopyNodeList(node, NULL, NULL); + return(ret); +} + +/** + * xmlCopyDtd: + * @dtd: the dtd + * + * Do a copy of the dtd. + * + * Returns: a new #xmlDtdPtr, or NULL in case of error. + */ +xmlDtdPtr +xmlCopyDtd(xmlDtdPtr dtd) { + xmlDtdPtr ret; + xmlNodePtr cur, p = NULL, q; + + if (dtd == NULL) return(NULL); + ret = xmlNewDtd(NULL, dtd->name, dtd->ExternalID, dtd->SystemID); + if (ret == NULL) return(NULL); + if (dtd->entities != NULL) + ret->entities = (void *) xmlCopyEntitiesTable( + (xmlEntitiesTablePtr) dtd->entities); + if (dtd->notations != NULL) + ret->notations = (void *) xmlCopyNotationTable( + (xmlNotationTablePtr) dtd->notations); + if (dtd->elements != NULL) + ret->elements = (void *) xmlCopyElementTable( + (xmlElementTablePtr) dtd->elements); + if (dtd->attributes != NULL) + ret->attributes = (void *) xmlCopyAttributeTable( + (xmlAttributeTablePtr) dtd->attributes); + if (dtd->pentities != NULL) + ret->pentities = (void *) xmlCopyEntitiesTable( + (xmlEntitiesTablePtr) dtd->pentities); + + cur = dtd->children; + while (cur != NULL) { + q = NULL; + + if (cur->type == XML_ENTITY_DECL) { + xmlEntityPtr tmp = (xmlEntityPtr) cur; + switch (tmp->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + q = (xmlNodePtr) xmlGetEntityFromDtd(ret, tmp->name); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + q = (xmlNodePtr) + xmlGetParameterEntityFromDtd(ret, tmp->name); + break; + case XML_INTERNAL_PREDEFINED_ENTITY: + break; + } + } else if (cur->type == XML_ELEMENT_DECL) { + xmlElementPtr tmp = (xmlElementPtr) cur; + q = (xmlNodePtr) + xmlGetDtdQElementDesc(ret, tmp->name, tmp->prefix); + } else if (cur->type == XML_ATTRIBUTE_DECL) { + xmlAttributePtr tmp = (xmlAttributePtr) cur; + q = (xmlNodePtr) + xmlGetDtdQAttrDesc(ret, tmp->elem, tmp->name, tmp->prefix); + } else if (cur->type == XML_COMMENT_NODE) { + q = xmlCopyNode(cur, 0); + } + + if (q == NULL) { + cur = cur->next; + continue; + } + + if (p == NULL) + ret->children = q; + else + p->next = q; + + q->prev = p; + q->parent = (xmlNodePtr) ret; + q->next = NULL; + ret->last = q; + p = q; + cur = cur->next; + } + + return(ret); +} + +/** + * xmlCopyDoc: + * @doc: the document + * @recursive: if 1 do a recursive copy. + * + * Do a copy of the document info. If recursive, the content tree will + * be copied too as well as DTD, namespaces and entities. + * + * Returns: a new #xmlDocPtr, or NULL in case of error. + */ +xmlDocPtr +xmlCopyDoc(xmlDocPtr doc, int recursive) { + xmlDocPtr ret; + + if (doc == NULL) return(NULL); + ret = xmlNewDoc(doc->version); + if (ret == NULL) return(NULL); + if (doc->name != NULL) + ret->name = xmlMemStrdup(doc->name); + if (doc->encoding != NULL) + ret->encoding = xmlStrdup(doc->encoding); + ret->charset = doc->charset; + ret->compression = doc->compression; + ret->standalone = doc->standalone; + if (!recursive) return(ret); + + ret->last = NULL; + ret->children = NULL; + if (doc->intSubset != NULL) { + ret->intSubset = xmlCopyDtd(doc->intSubset); + xmlSetTreeDoc((xmlNodePtr)ret->intSubset, ret); + ret->intSubset->parent = ret; + } + if (doc->oldNs != NULL) + ret->oldNs = xmlCopyNamespaceList(doc->oldNs); + if (doc->children != NULL) { + xmlNodePtr tmp; + + ret->children = xmlStaticCopyNodeList(doc->children, ret, + (xmlNodePtr)ret); + ret->last = NULL; + tmp = ret->children; + while (tmp != NULL) { + if (tmp->next == NULL) + ret->last = tmp; + tmp = tmp->next; + } + } + return(ret); +} + +/************************************************************************ + * * + * Content access functions * + * * + ************************************************************************/ + +/** + * xmlGetLineNo: + * @node: valid node + * + * Get line number of node. this requires activation of this option + * before invoking the parser by calling xmlLineNumbersDefault(1) + * + * Returns the line number if successful, -1 otherwise + */ +long +xmlGetLineNo(xmlNodePtr node) +{ + long result = -1; + + if (!node) + return result; + if (node->type == XML_ELEMENT_NODE) + result = (long) node->content; + else if ((node->prev != NULL) && + ((node->prev->type == XML_ELEMENT_NODE) || + (node->prev->type == XML_TEXT_NODE))) + result = xmlGetLineNo(node->prev); + else if ((node->parent != NULL) && + ((node->parent->type == XML_ELEMENT_NODE) || + (node->parent->type == XML_TEXT_NODE))) + result = xmlGetLineNo(node->parent); + + return result; +} + +/** + * xmlGetNodePath: + * @node: a node + * + * Build a structure based Path for the given node + * + * Returns the new path or NULL in case of error. The caller must free + * the returned string + */ +xmlChar * +xmlGetNodePath(xmlNodePtr node) +{ + xmlNodePtr cur, tmp, next; + xmlChar *buffer = NULL, *temp; + size_t buf_len; + xmlChar *buf; + const char *sep; + const char *name; + char nametemp[100]; + int occur = 0; + + if (node == NULL) + return (NULL); + + buf_len = 500; + buffer = (xmlChar *) xmlMalloc(buf_len * sizeof(xmlChar)); + if (buffer == NULL) + return (NULL); + buf = (xmlChar *) xmlMalloc(buf_len * sizeof(xmlChar)); + if (buf == NULL) { + xmlFree(buffer); + return (NULL); + } + + buffer[0] = 0; + cur = node; + do { + name = ""; + sep = "?"; + occur = 0; + if ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + if (buffer[0] == '/') + break; + sep = "/"; + next = NULL; + } else if (cur->type == XML_ELEMENT_NODE) { + sep = "/"; + name = (const char *) cur->name; + if (cur->ns) { + snprintf(nametemp, sizeof(nametemp) - 1, + "%s:%s", cur->ns->prefix, cur->name); + nametemp[sizeof(nametemp) - 1] = 0; + name = nametemp; + } + next = cur->parent; + + /* + * Thumbler index computation + */ + tmp = cur->prev; + while (tmp != NULL) { + if ((tmp->type == XML_ELEMENT_NODE) && + (xmlStrEqual(cur->name, tmp->name))) + occur++; + tmp = tmp->prev; + } + if (occur == 0) { + tmp = cur->next; + while (tmp != NULL && occur == 0) { + if ((tmp->type == XML_ELEMENT_NODE) && + (xmlStrEqual(cur->name, tmp->name))) + occur++; + tmp = tmp->next; + } + if (occur != 0) + occur = 1; + } else + occur++; + } else if (cur->type == XML_COMMENT_NODE) { + sep = "/"; + name = "comment()"; + next = cur->parent; + + /* + * Thumbler index computation + */ + tmp = cur->prev; + while (tmp != NULL) { + if (tmp->type == XML_COMMENT_NODE) + occur++; + tmp = tmp->prev; + } + if (occur == 0) { + tmp = cur->next; + while (tmp != NULL && occur == 0) { + if (tmp->type == XML_COMMENT_NODE) + occur++; + tmp = tmp->next; + } + if (occur != 0) + occur = 1; + } else + occur++; + } else if ((cur->type == XML_TEXT_NODE) || + (cur->type == XML_CDATA_SECTION_NODE)) { + sep = "/"; + name = "text()"; + next = cur->parent; + + /* + * Thumbler index computation + */ + tmp = cur->prev; + while (tmp != NULL) { + if ((cur->type == XML_TEXT_NODE) || + (cur->type == XML_CDATA_SECTION_NODE)) + occur++; + tmp = tmp->prev; + } + if (occur == 0) { + tmp = cur->next; + while (tmp != NULL && occur == 0) { + if ((cur->type == XML_TEXT_NODE) || + (cur->type == XML_CDATA_SECTION_NODE)) + occur++; + tmp = tmp->next; + } + if (occur != 0) + occur = 1; + } else + occur++; + } else if (cur->type == XML_PI_NODE) { + sep = "/"; + snprintf(nametemp, sizeof(nametemp) - 1, + "processing-instruction('%s')", cur->name); + nametemp[sizeof(nametemp) - 1] = 0; + name = nametemp; + + next = cur->parent; + + /* + * Thumbler index computation + */ + tmp = cur->prev; + while (tmp != NULL) { + if ((tmp->type == XML_PI_NODE) && + (xmlStrEqual(cur->name, tmp->name))) + occur++; + tmp = tmp->prev; + } + if (occur == 0) { + tmp = cur->next; + while (tmp != NULL && occur == 0) { + if ((tmp->type == XML_PI_NODE) && + (xmlStrEqual(cur->name, tmp->name))) + occur++; + tmp = tmp->next; + } + if (occur != 0) + occur = 1; + } else + occur++; + + } else if (cur->type == XML_ATTRIBUTE_NODE) { + sep = "/@"; + name = (const char *) (((xmlAttrPtr) cur)->name); + next = ((xmlAttrPtr) cur)->parent; + } else { + next = cur->parent; + } + + /* + * Make sure there is enough room + */ + if (xmlStrlen(buffer) + sizeof(nametemp) + 20 > buf_len) { + buf_len = + 2 * buf_len + xmlStrlen(buffer) + sizeof(nametemp) + 20; + temp = (xmlChar *) xmlRealloc(buffer, buf_len); + if (temp == NULL) { + xmlFree(buf); + xmlFree(buffer); + return (NULL); + } + buffer = temp; + temp = (xmlChar *) xmlRealloc(buf, buf_len); + if (temp == NULL) { + xmlFree(buf); + xmlFree(buffer); + return (NULL); + } + buf = temp; + } + if (occur == 0) + snprintf((char *) buf, buf_len, "%s%s%s", + sep, name, (char *) buffer); + else + snprintf((char *) buf, buf_len, "%s%s[%d]%s", + sep, name, occur, (char *) buffer); + snprintf((char *) buffer, buf_len, "%s", buf); + cur = next; + } while (cur != NULL); + xmlFree(buf); + return (buffer); +} + +/** + * xmlDocGetRootElement: + * @doc: the document + * + * Get the root element of the document (doc->children is a list + * containing possibly comments, PIs, etc ...). + * + * Returns the #xmlNodePtr for the root or NULL + */ +xmlNodePtr +xmlDocGetRootElement(xmlDocPtr doc) { + xmlNodePtr ret; + + if (doc == NULL) return(NULL); + ret = doc->children; + while (ret != NULL) { + if (ret->type == XML_ELEMENT_NODE) + return(ret); + ret = ret->next; + } + return(ret); +} + +/** + * xmlDocSetRootElement: + * @doc: the document + * @root: the new document root element + * + * Set the root element of the document (doc->children is a list + * containing possibly comments, PIs, etc ...). + * + * Returns the old root element if any was found + */ +xmlNodePtr +xmlDocSetRootElement(xmlDocPtr doc, xmlNodePtr root) { + xmlNodePtr old = NULL; + + if (doc == NULL) return(NULL); + if (root == NULL) + return(NULL); + xmlUnlinkNode(root); + root->doc = doc; + root->parent = (xmlNodePtr) doc; + old = doc->children; + while (old != NULL) { + if (old->type == XML_ELEMENT_NODE) + break; + old = old->next; + } + if (old == NULL) { + if (doc->children == NULL) { + doc->children = root; + doc->last = root; + } else { + xmlAddSibling(doc->children, root); + } + } else { + xmlReplaceNode(old, root); + } + return(old); +} + +/** + * xmlNodeSetLang: + * @cur: the node being changed + * @lang: the language description + * + * Set the language of a node, i.e. the values of the xml:lang + * attribute. + */ +void +xmlNodeSetLang(xmlNodePtr cur, const xmlChar *lang) { + xmlNsPtr ns; + + if (cur == NULL) return; + switch(cur->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_PI_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_NAMESPACE_DECL: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return; + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + break; + } + ns = xmlSearchNsByHref(cur->doc, cur, XML_XML_NAMESPACE); + if (ns == NULL) + return; + xmlSetNsProp(cur, ns, BAD_CAST "lang", lang); +} + +/** + * xmlNodeGetLang: + * @cur: the node being checked + * + * Searches the language of a node, i.e. the values of the xml:lang + * attribute or the one carried by the nearest ancestor. + * + * Returns a pointer to the lang value, or NULL if not found + * It's up to the caller to free the memory with xmlFree(). + */ +xmlChar * +xmlNodeGetLang(xmlNodePtr cur) { + xmlChar *lang; + + while (cur != NULL) { + lang = xmlGetNsProp(cur, BAD_CAST "lang", XML_XML_NAMESPACE); + if (lang != NULL) + return(lang); + cur = cur->parent; + } + return(NULL); +} + + +/** + * xmlNodeSetSpacePreserve: + * @cur: the node being changed + * @val: the xml:space value ("0": default, 1: "preserve") + * + * Set (or reset) the space preserving behaviour of a node, i.e. the + * value of the xml:space attribute. + */ +void +xmlNodeSetSpacePreserve(xmlNodePtr cur, int val) { + xmlNsPtr ns; + + if (cur == NULL) return; + switch(cur->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_PI_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return; + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + break; + } + ns = xmlSearchNsByHref(cur->doc, cur, XML_XML_NAMESPACE); + if (ns == NULL) + return; + switch (val) { + case 0: + xmlSetNsProp(cur, ns, BAD_CAST "space", BAD_CAST "default"); + break; + case 1: + xmlSetNsProp(cur, ns, BAD_CAST "space", BAD_CAST "preserve"); + break; + } +} + +/** + * xmlNodeGetSpacePreserve: + * @cur: the node being checked + * + * Searches the space preserving behaviour of a node, i.e. the values + * of the xml:space attribute or the one carried by the nearest + * ancestor. + * + * Returns -1 if xml:space is not inherited, 0 if "default", 1 if "preserve" + */ +int +xmlNodeGetSpacePreserve(xmlNodePtr cur) { + xmlChar *space; + + while (cur != NULL) { + space = xmlGetNsProp(cur, BAD_CAST "space", XML_XML_NAMESPACE); + if (space != NULL) { + if (xmlStrEqual(space, BAD_CAST "preserve")) { + xmlFree(space); + return(1); + } + if (xmlStrEqual(space, BAD_CAST "default")) { + xmlFree(space); + return(0); + } + xmlFree(space); + } + cur = cur->parent; + } + return(-1); +} + +/** + * xmlNodeSetName: + * @cur: the node being changed + * @name: the new tag name + * + * Set (or reset) the name of a node. + */ +void +xmlNodeSetName(xmlNodePtr cur, const xmlChar *name) { + if (cur == NULL) return; + if (name == NULL) return; + switch(cur->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return; + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + case XML_PI_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_DTD_NODE: + case XML_DOCUMENT_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + break; + } + if (cur->name != NULL) xmlFree((xmlChar *) cur->name); + cur->name = xmlStrdup(name); +} + +/** + * xmlNodeSetBase: + * @cur: the node being changed + * @uri: the new base URI + * + * Set (or reset) the base URI of a node, i.e. the value of the + * xml:base attribute. + */ +void +xmlNodeSetBase(xmlNodePtr cur, xmlChar* uri) { + xmlNsPtr ns; + + if (cur == NULL) return; + switch(cur->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_PI_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return; + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + break; + case XML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + case XML_HTML_DOCUMENT_NODE: { + xmlDocPtr doc = (xmlDocPtr) cur; + + if (doc->URL != NULL) + xmlFree((xmlChar *) doc->URL); + if (uri == NULL) + doc->URL = NULL; + else + doc->URL = xmlStrdup(uri); + return; + } + } + + ns = xmlSearchNsByHref(cur->doc, cur, XML_XML_NAMESPACE); + if (ns == NULL) + return; + xmlSetNsProp(cur, ns, BAD_CAST "base", uri); +} + +/** + * xmlNodeGetBase: + * @doc: the document the node pertains to + * @cur: the node being checked + * + * Searches for the BASE URL. The code should work on both XML + * and HTML document even if base mechanisms are completely different. + * It returns the base as defined in RFC 2396 sections + * 5.1.1. Base URI within Document Content + * and + * 5.1.2. Base URI from the Encapsulating Entity + * However it does not return the document base (5.1.3), use + * xmlDocumentGetBase() for this + * + * Returns a pointer to the base URL, or NULL if not found + * It's up to the caller to free the memory with xmlFree(). + */ +xmlChar * +xmlNodeGetBase(xmlDocPtr doc, xmlNodePtr cur) { + xmlChar *oldbase = NULL; + xmlChar *base, *newbase; + + if ((cur == NULL) && (doc == NULL)) + return(NULL); + if (doc == NULL) doc = cur->doc; + if ((doc != NULL) && (doc->type == XML_HTML_DOCUMENT_NODE)) { + cur = doc->children; + while ((cur != NULL) && (cur->name != NULL)) { + if (cur->type != XML_ELEMENT_NODE) { + cur = cur->next; + continue; + } + if (!xmlStrcasecmp(cur->name, BAD_CAST "html")) { + cur = cur->children; + continue; + } + if (!xmlStrcasecmp(cur->name, BAD_CAST "head")) { + cur = cur->children; + continue; + } + if (!xmlStrcasecmp(cur->name, BAD_CAST "base")) { + return(xmlGetProp(cur, BAD_CAST "href")); + } + cur = cur->next; + } + return(NULL); + } + while (cur != NULL) { + if (cur->type == XML_ENTITY_DECL) { + xmlEntityPtr ent = (xmlEntityPtr) cur; + return(xmlStrdup(ent->URI)); + } + if (cur->type == XML_ELEMENT_NODE) { + base = xmlGetNsProp(cur, BAD_CAST "base", XML_XML_NAMESPACE); + if (base != NULL) { + if (oldbase != NULL) { + newbase = xmlBuildURI(oldbase, base); + if (newbase != NULL) { + xmlFree(oldbase); + xmlFree(base); + oldbase = newbase; + } else { + xmlFree(oldbase); + xmlFree(base); + return(NULL); + } + } else { + oldbase = base; + } + if ((!xmlStrncmp(oldbase, BAD_CAST "http://", 7)) || + (!xmlStrncmp(oldbase, BAD_CAST "ftp://", 6)) || + (!xmlStrncmp(oldbase, BAD_CAST "urn:", 4))) + return(oldbase); + } + } + cur = cur->parent; + } + if ((doc != NULL) && (doc->URL != NULL)) { + if (oldbase == NULL) + return(xmlStrdup(doc->URL)); + newbase = xmlBuildURI(oldbase, doc->URL); + xmlFree(oldbase); + return(newbase); + } + return(oldbase); +} + +/** + * xmlNodeGetContent: + * @cur: the node being read + * + * Read the value of a node, this can be either the text carried + * directly by this node if it's a TEXT node or the aggregate string + * of the values carried by this node child's (TEXT and ENTITY_REF). + * Entity references are substituted. + * Returns a new #xmlChar * or NULL if no content is available. + * It's up to the caller to free the memory with xmlFree(). + */ +xmlChar * +xmlNodeGetContent(xmlNodePtr cur) +{ + if (cur == NULL) + return (NULL); + switch (cur->type) { + case XML_DOCUMENT_FRAG_NODE: + case XML_ELEMENT_NODE:{ + xmlNodePtr tmp = cur; + xmlBufferPtr buffer; + xmlChar *ret; + + buffer = xmlBufferCreate(); + if (buffer == NULL) + return (NULL); + while (tmp != NULL) { + switch (tmp->type) { + case XML_CDATA_SECTION_NODE: + case XML_TEXT_NODE: + if (tmp->content != NULL) + xmlBufferCat(buffer, tmp->content); + break; + case XML_ENTITY_REF_NODE:{ + /* recursive substitution of entity references */ + xmlChar *cont = xmlNodeGetContent(tmp); + + if (cont) { + xmlBufferCat(buffer, + (const xmlChar *) cont); + xmlFree(cont); + } + break; + } + default: + break; + } + /* + * Skip to next node + */ + if (tmp->children != NULL) { + if (tmp->children->type != XML_ENTITY_DECL) { + tmp = tmp->children; + continue; + } + } + if (tmp == cur) + break; + + if (tmp->next != NULL) { + tmp = tmp->next; + continue; + } + + do { + tmp = tmp->parent; + if (tmp == NULL) + break; + if (tmp == cur) { + tmp = NULL; + break; + } + if (tmp->next != NULL) { + tmp = tmp->next; + break; + } + } while (tmp != NULL); + } + ret = buffer->content; + buffer->content = NULL; + xmlBufferFree(buffer); + return (ret); + } + case XML_ATTRIBUTE_NODE:{ + xmlAttrPtr attr = (xmlAttrPtr) cur; + + if (attr->parent != NULL) + return (xmlNodeListGetString + (attr->parent->doc, attr->children, 1)); + else + return (xmlNodeListGetString(NULL, attr->children, 1)); + break; + } + case XML_COMMENT_NODE: + case XML_PI_NODE: + if (cur->content != NULL) + return (xmlStrdup(cur->content)); + return (NULL); + case XML_ENTITY_REF_NODE:{ + xmlEntityPtr ent; + xmlNodePtr tmp; + xmlBufferPtr buffer; + xmlChar *ret; + + /* lookup entity declaration */ + ent = xmlGetDocEntity(cur->doc, cur->name); + if (ent == NULL) + return (NULL); + + buffer = xmlBufferCreate(); + if (buffer == NULL) + return (NULL); + + /* an entity content can be any "well balanced chunk", + * i.e. the result of the content [43] production: + * http://www.w3.org/TR/REC-xml#NT-content + * -> we iterate through child nodes and recursive call + * xmlNodeGetContent() which handles all possible node types */ + tmp = ent->children; + while (tmp) { + xmlChar *cont = xmlNodeGetContent(tmp); + + if (cont) { + xmlBufferCat(buffer, (const xmlChar *) cont); + xmlFree(cont); + } + tmp = tmp->next; + } + + ret = buffer->content; + buffer->content = NULL; + xmlBufferFree(buffer); + return (ret); + } + case XML_ENTITY_NODE: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return (NULL); + case XML_NAMESPACE_DECL: { + xmlChar *tmp; + + tmp = xmlStrdup(((xmlNsPtr) cur)->href); + return (tmp); + } + case XML_ELEMENT_DECL: + /* TODO !!! */ + return (NULL); + case XML_ATTRIBUTE_DECL: + /* TODO !!! */ + return (NULL); + case XML_ENTITY_DECL: + /* TODO !!! */ + return (NULL); + case XML_CDATA_SECTION_NODE: + case XML_TEXT_NODE: + if (cur->content != NULL) + return (xmlStrdup(cur->content)); + return (NULL); + } + return (NULL); +} +/** + * xmlNodeSetContent: + * @cur: the node being modified + * @content: the new value of the content + * + * Replace the content of a node. + */ +void +xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeSetContent : node == NULL\n"); +#endif + return; + } + switch (cur->type) { + case XML_DOCUMENT_FRAG_NODE: + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + if (cur->children != NULL) xmlFreeNodeList(cur->children); + cur->children = xmlStringGetNodeList(cur->doc, content); + UPDATE_LAST_CHILD_AND_PARENT(cur) + break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + if (cur->content != NULL) { + xmlFree(cur->content); + } + if (cur->children != NULL) xmlFreeNodeList(cur->children); + cur->last = cur->children = NULL; + if (content != NULL) { + cur->content = xmlStrdup(content); + } else + cur->content = NULL; + break; + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + break; + case XML_NOTATION_NODE: + break; + case XML_DTD_NODE: + break; + case XML_NAMESPACE_DECL: + break; + case XML_ELEMENT_DECL: + /* TODO !!! */ + break; + case XML_ATTRIBUTE_DECL: + /* TODO !!! */ + break; + case XML_ENTITY_DECL: + /* TODO !!! */ + break; + } +} + +/** + * xmlNodeSetContentLen: + * @cur: the node being modified + * @content: the new value of the content + * @len: the size of @content + * + * Replace the content of a node. + */ +void +xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeSetContentLen : node == NULL\n"); +#endif + return; + } + switch (cur->type) { + case XML_DOCUMENT_FRAG_NODE: + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + if (cur->children != NULL) xmlFreeNodeList(cur->children); + cur->children = xmlStringLenGetNodeList(cur->doc, content, len); + UPDATE_LAST_CHILD_AND_PARENT(cur) + break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_NOTATION_NODE: + if (cur->content != NULL) { + xmlFree(cur->content); + } + if (cur->children != NULL) xmlFreeNodeList(cur->children); + cur->children = cur->last = NULL; + if (content != NULL) { + cur->content = xmlStrndup(content, len); + } else + cur->content = NULL; + break; + case XML_DOCUMENT_NODE: + case XML_DTD_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + break; + case XML_ELEMENT_DECL: + /* TODO !!! */ + break; + case XML_ATTRIBUTE_DECL: + /* TODO !!! */ + break; + case XML_ENTITY_DECL: + /* TODO !!! */ + break; + } +} + +/** + * xmlNodeAddContentLen: + * @cur: the node being modified + * @content: extra content + * @len: the size of @content + * + * Append the extra substring to the node content. + */ +void +xmlNodeAddContentLen(xmlNodePtr cur, const xmlChar *content, int len) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeAddContentLen : node == NULL\n"); +#endif + return; + } + if (len <= 0) return; + switch (cur->type) { + case XML_DOCUMENT_FRAG_NODE: + case XML_ELEMENT_NODE: { + xmlNodePtr last, newNode, tmp; + + last = cur->last; + newNode = xmlNewTextLen(content, len); + if (newNode != NULL) { + tmp = xmlAddChild(cur, newNode); + if (tmp != newNode) + return; + if ((last != NULL) && (last->next == newNode)) { + xmlTextMerge(last, newNode); + } + } + break; + } + case XML_ATTRIBUTE_NODE: + break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_NOTATION_NODE: + if (content != NULL) { + cur->content = xmlStrncat(cur->content, content, len); + } + case XML_DOCUMENT_NODE: + case XML_DTD_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + break; + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + break; + } +} + +/** + * xmlNodeAddContent: + * @cur: the node being modified + * @content: extra content + * + * Append the extra substring to the node content. + */ +void +xmlNodeAddContent(xmlNodePtr cur, const xmlChar *content) { + int len; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeAddContent : node == NULL\n"); +#endif + return; + } + if (content == NULL) return; + len = xmlStrlen(content); + xmlNodeAddContentLen(cur, content, len); +} + +/** + * xmlTextMerge: + * @first: the first text node + * @second: the second text node being merged + * + * Merge two text nodes into one + * Returns the first text node augmented + */ +xmlNodePtr +xmlTextMerge(xmlNodePtr first, xmlNodePtr second) { + if (first == NULL) return(second); + if (second == NULL) return(first); + if (first->type != XML_TEXT_NODE) return(first); + if (second->type != XML_TEXT_NODE) return(first); + if (second->name != first->name) + return(first); + xmlNodeAddContent(first, second->content); + xmlUnlinkNode(second); + xmlFreeNode(second); + return(first); +} + +/** + * xmlGetNsList: + * @doc: the document + * @node: the current node + * + * Search all the namespace applying to a given element. + * Returns an NULL terminated array of all the #xmlNsPtr found + * that need to be freed by the caller or NULL if no + * namespace if defined + */ +xmlNsPtr * +xmlGetNsList(xmlDocPtr doc ATTRIBUTE_UNUSED, xmlNodePtr node) +{ + xmlNsPtr cur; + xmlNsPtr *ret = NULL; + int nbns = 0; + int maxns = 10; + int i; + + while (node != NULL) { + if (node->type == XML_ELEMENT_NODE) { + cur = node->nsDef; + while (cur != NULL) { + if (ret == NULL) { + ret = + (xmlNsPtr *) xmlMalloc((maxns + 1) * + sizeof(xmlNsPtr)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlGetNsList : out of memory!\n"); + return (NULL); + } + ret[nbns] = NULL; + } + for (i = 0; i < nbns; i++) { + if ((cur->prefix == ret[i]->prefix) || + (xmlStrEqual(cur->prefix, ret[i]->prefix))) + break; + } + if (i >= nbns) { + if (nbns >= maxns) { + maxns *= 2; + ret = (xmlNsPtr *) xmlRealloc(ret, + (maxns + + 1) * + sizeof(xmlNsPtr)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlGetNsList : realloc failed!\n"); + return (NULL); + } + } + ret[nbns++] = cur; + ret[nbns] = NULL; + } + + cur = cur->next; + } + } + node = node->parent; + } + return (ret); +} + +/** + * xmlSearchNs: + * @doc: the document + * @node: the current node + * @nameSpace: the namespace prefix + * + * Search a Ns registered under a given name space for a document. + * recurse on the parents until it finds the defined namespace + * or return NULL otherwise. + * @nameSpace can be NULL, this is a search for the default namespace. + * We don't allow to cross entities boundaries. If you don't declare + * the namespace within those you will be in troubles !!! A warning + * is generated to cover this case. + * + * Returns the namespace pointer or NULL. + */ +xmlNsPtr +xmlSearchNs(xmlDocPtr doc, xmlNodePtr node, const xmlChar *nameSpace) { + xmlNsPtr cur; + + if (node == NULL) return(NULL); + if ((nameSpace != NULL) && + (xmlStrEqual(nameSpace, (const xmlChar *)"xml"))) { + if ((doc == NULL) && (node->type == XML_ELEMENT_NODE)) { + /* + * The XML-1.0 namespace is normally held on the root + * element. In this case exceptionally create it on the + * node element. + */ + cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSearchNs : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNs)); + cur->type = XML_LOCAL_NAMESPACE; + cur->href = xmlStrdup(XML_XML_NAMESPACE); + cur->prefix = xmlStrdup((const xmlChar *)"xml"); + cur->next = node->nsDef; + node->nsDef = cur; + return(cur); + } + if (doc->oldNs == NULL) { + /* + * Allocate a new Namespace and fill the fields. + */ + doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); + if (doc->oldNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSearchNs : malloc failed\n"); + return(NULL); + } + memset(doc->oldNs, 0, sizeof(xmlNs)); + doc->oldNs->type = XML_LOCAL_NAMESPACE; + + doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE); + doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml"); + } + return(doc->oldNs); + } + while (node != NULL) { + if ((node->type == XML_ENTITY_REF_NODE) || + (node->type == XML_ENTITY_NODE) || + (node->type == XML_ENTITY_DECL)) + return(NULL); + if (node->type == XML_ELEMENT_NODE) { + cur = node->nsDef; + while (cur != NULL) { + if ((cur->prefix == NULL) && (nameSpace == NULL) && + (cur->href != NULL)) + return(cur); + if ((cur->prefix != NULL) && (nameSpace != NULL) && + (cur->href != NULL) && + (xmlStrEqual(cur->prefix, nameSpace))) + return(cur); + cur = cur->next; + } + } + node = node->parent; + } + return(NULL); +} + +/** + * xmlSearchNsByHref: + * @doc: the document + * @node: the current node + * @href: the namespace value + * + * Search a Ns aliasing a given URI. Recurse on the parents until it finds + * the defined namespace or return NULL otherwise. + * Returns the namespace pointer or NULL. + */ +xmlNsPtr +xmlSearchNsByHref(xmlDocPtr doc, xmlNodePtr node, const xmlChar *href) { + xmlNsPtr cur; + xmlNodePtr orig = node; + + if ((node == NULL) || (href == NULL)) return(NULL); + if (xmlStrEqual(href, XML_XML_NAMESPACE)) { + /* + * Only the document can hold the XML spec namespace. + */ + if ((doc == NULL) && (node->type == XML_ELEMENT_NODE)) { + /* + * The XML-1.0 namespace is normally held on the root + * element. In this case exceptionally create it on the + * node element. + */ + cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSearchNs : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNs)); + cur->type = XML_LOCAL_NAMESPACE; + cur->href = xmlStrdup(XML_XML_NAMESPACE); + cur->prefix = xmlStrdup((const xmlChar *)"xml"); + cur->next = node->nsDef; + node->nsDef = cur; + return(cur); + } + if (doc->oldNs == NULL) { + /* + * Allocate a new Namespace and fill the fields. + */ + doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); + if (doc->oldNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSearchNsByHref : malloc failed\n"); + return(NULL); + } + memset(doc->oldNs, 0, sizeof(xmlNs)); + doc->oldNs->type = XML_LOCAL_NAMESPACE; + + doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE); + doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml"); + } + return(doc->oldNs); + } + while (node != NULL) { + cur = node->nsDef; + while (cur != NULL) { + if ((cur->href != NULL) && (href != NULL) && + (xmlStrEqual(cur->href, href))) { + /* + * Check that the prefix is not shadowed between orig and node + */ + xmlNodePtr check = orig; + xmlNsPtr tst; + + while (check != node) { + tst = check->nsDef; + while (tst != NULL) { + if ((tst->prefix == NULL) && (cur->prefix == NULL)) + goto shadowed; + if ((tst->prefix != NULL) && (cur->prefix != NULL) && + (xmlStrEqual(tst->prefix, cur->prefix))) + goto shadowed; + tst = tst->next; + } + check = check->parent; + } + return(cur); + } +shadowed: + cur = cur->next; + } + node = node->parent; + } + return(NULL); +} + +/** + * xmlNewReconciliedNs: + * @doc: the document + * @tree: a node expected to hold the new namespace + * @ns: the original namespace + * + * This function tries to locate a namespace definition in a tree + * ancestors, or create a new namespace definition node similar to + * @ns trying to reuse the same prefix. However if the given prefix is + * null (default namespace) or reused within the subtree defined by + * @tree or on one of its ancestors then a new prefix is generated. + * Returns the (new) namespace definition or NULL in case of error + */ +xmlNsPtr +xmlNewReconciliedNs(xmlDocPtr doc, xmlNodePtr tree, xmlNsPtr ns) { + xmlNsPtr def; + xmlChar prefix[50]; + int counter = 1; + + if (tree == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewReconciliedNs : tree == NULL\n"); +#endif + return(NULL); + } + if (ns == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNewReconciliedNs : ns == NULL\n"); +#endif + return(NULL); + } + /* + * Search an existing namespace definition inherited. + */ + def = xmlSearchNsByHref(doc, tree, ns->href); + if (def != NULL) + return(def); + + /* + * Find a close prefix which is not already in use. + * Let's strip namespace prefixes longer than 20 chars ! + */ + if (ns->prefix == NULL) + snprintf((char *) prefix, sizeof(prefix), "default"); + else + snprintf((char *) prefix, sizeof(prefix), "%.20s", ns->prefix); + + def = xmlSearchNs(doc, tree, prefix); + while (def != NULL) { + if (counter > 1000) return(NULL); + if (ns->prefix == NULL) + snprintf((char *) prefix, sizeof(prefix), "default%d", counter++); + else + snprintf((char *) prefix, sizeof(prefix), "%.20s%d", ns->prefix, counter++); + def = xmlSearchNs(doc, tree, prefix); + } + + /* + * OK, now we are ready to create a new one. + */ + def = xmlNewNs(tree, ns->href, prefix); + return(def); +} + +/** + * xmlReconciliateNs: + * @doc: the document + * @tree: a node defining the subtree to reconciliate + * + * This function checks that all the namespaces declared within the given + * tree are properly declared. This is needed for example after Copy or Cut + * and then paste operations. The subtree may still hold pointers to + * namespace declarations outside the subtree or invalid/masked. As much + * as possible the function try to reuse the existing namespaces found in + * the new environment. If not possible the new namespaces are redeclared + * on @tree at the top of the given subtree. + * Returns the number of namespace declarations created or -1 in case of error. + */ +int +xmlReconciliateNs(xmlDocPtr doc, xmlNodePtr tree) { + xmlNsPtr *oldNs = NULL; + xmlNsPtr *newNs = NULL; + int sizeCache = 0; + int nbCache = 0; + + xmlNsPtr n; + xmlNodePtr node = tree; + xmlAttrPtr attr; + int ret = 0, i; + + while (node != NULL) { + /* + * Reconciliate the node namespace + */ + if (node->ns != NULL) { + /* + * initialize the cache if needed + */ + if (sizeCache == 0) { + sizeCache = 10; + oldNs = (xmlNsPtr *) xmlMalloc(sizeCache * + sizeof(xmlNsPtr)); + if (oldNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlReconciliateNs : memory pbm\n"); + return(-1); + } + newNs = (xmlNsPtr *) xmlMalloc(sizeCache * + sizeof(xmlNsPtr)); + if (newNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlReconciliateNs : memory pbm\n"); + xmlFree(oldNs); + return(-1); + } + } + for (i = 0;i < nbCache;i++) { + if (oldNs[i] == node->ns) { + node->ns = newNs[i]; + break; + } + } + if (i == nbCache) { + /* + * OK we need to recreate a new namespace definition + */ + n = xmlNewReconciliedNs(doc, tree, node->ns); + if (n != NULL) { /* :-( what if else ??? */ + /* + * check if we need to grow the cache buffers. + */ + if (sizeCache <= nbCache) { + sizeCache *= 2; + oldNs = (xmlNsPtr *) xmlRealloc(oldNs, sizeCache * + sizeof(xmlNsPtr)); + if (oldNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlReconciliateNs : memory pbm\n"); + xmlFree(newNs); + return(-1); + } + newNs = (xmlNsPtr *) xmlRealloc(newNs, sizeCache * + sizeof(xmlNsPtr)); + if (newNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlReconciliateNs : memory pbm\n"); + xmlFree(oldNs); + return(-1); + } + } + newNs[nbCache] = n; + oldNs[nbCache++] = node->ns; + node->ns = n; + } + } + } + /* + * now check for namespace hold by attributes on the node. + */ + attr = node->properties; + while (attr != NULL) { + if (attr->ns != NULL) { + /* + * initialize the cache if needed + */ + if (sizeCache == 0) { + sizeCache = 10; + oldNs = (xmlNsPtr *) xmlMalloc(sizeCache * + sizeof(xmlNsPtr)); + if (oldNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlReconciliateNs : memory pbm\n"); + return(-1); + } + newNs = (xmlNsPtr *) xmlMalloc(sizeCache * + sizeof(xmlNsPtr)); + if (newNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlReconciliateNs : memory pbm\n"); + xmlFree(oldNs); + return(-1); + } + } + for (i = 0;i < nbCache;i++) { + if (oldNs[i] == attr->ns) { + attr->ns = newNs[i]; + break; + } + } + if (i == nbCache) { + /* + * OK we need to recreate a new namespace definition + */ + n = xmlNewReconciliedNs(doc, tree, attr->ns); + if (n != NULL) { /* :-( what if else ??? */ + /* + * check if we need to grow the cache buffers. + */ + if (sizeCache <= nbCache) { + sizeCache *= 2; + oldNs = (xmlNsPtr *) xmlRealloc(oldNs, sizeCache * + sizeof(xmlNsPtr)); + if (oldNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlReconciliateNs : memory pbm\n"); + xmlFree(newNs); + return(-1); + } + newNs = (xmlNsPtr *) xmlRealloc(newNs, sizeCache * + sizeof(xmlNsPtr)); + if (newNs == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlReconciliateNs : memory pbm\n"); + xmlFree(oldNs); + return(-1); + } + } + newNs[nbCache] = n; + oldNs[nbCache++] = attr->ns; + attr->ns = n; + } + } + } + attr = attr->next; + } + + /* + * Browse the full subtree, deep first + */ + if (node->children != NULL) { + /* deep first */ + node = node->children; + } else if ((node != tree) && (node->next != NULL)) { + /* then siblings */ + node = node->next; + } else if (node != tree) { + /* go up to parents->next if needed */ + while (node != tree) { + if (node->parent != NULL) + node = node->parent; + if ((node != tree) && (node->next != NULL)) { + node = node->next; + break; + } + if (node->parent == NULL) { + node = NULL; + break; + } + } + /* exit condition */ + if (node == tree) + node = NULL; + } else + break; + } + if (oldNs != NULL) + xmlFree(oldNs); + if (newNs != NULL) + xmlFree(newNs); + return(ret); +} + +/** + * xmlHasProp: + * @node: the node + * @name: the attribute name + * + * Search an attribute associated to a node + * This function also looks in DTD attribute declaration for #FIXED or + * default declaration values unless DTD use has been turned off. + * + * Returns the attribute or the attribute declaration or NULL if + * neither was found. + */ +xmlAttrPtr +xmlHasProp(xmlNodePtr node, const xmlChar *name) { + xmlAttrPtr prop; + xmlDocPtr doc; + + if ((node == NULL) || (name == NULL)) return(NULL); + /* + * Check on the properties attached to the node + */ + prop = node->properties; + while (prop != NULL) { + if (xmlStrEqual(prop->name, name)) { + return(prop); + } + prop = prop->next; + } + if (!xmlCheckDTD) return(NULL); + + /* + * Check if there is a default declaration in the internal + * or external subsets + */ + doc = node->doc; + if (doc != NULL) { + xmlAttributePtr attrDecl; + if (doc->intSubset != NULL) { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, node->name, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, node->name, name); + if (attrDecl != NULL) + return((xmlAttrPtr) attrDecl); + } + } + return(NULL); +} + +/** + * xmlHasNsProp: + * @node: the node + * @name: the attribute name + * @nameSpace: the URI of the namespace + * + * Search for an attribute associated to a node + * This attribute has to be anchored in the namespace specified. + * This does the entity substitution. + * This function looks in DTD attribute declaration for #FIXED or + * default declaration values unless DTD use has been turned off. + * + * Returns the attribute or the attribute declaration or NULL + * if neither was found. + */ +xmlAttrPtr +xmlHasNsProp(xmlNodePtr node, const xmlChar *name, const xmlChar *nameSpace) { + xmlAttrPtr prop; + xmlDocPtr doc; + + if (node == NULL) + return(NULL); + + prop = node->properties; + if (nameSpace == NULL) + return(xmlHasProp(node, name)); + while (prop != NULL) { + /* + * One need to have + * - same attribute names + * - and the attribute carrying that namespace + */ + if ((xmlStrEqual(prop->name, name)) && + ((prop->ns != NULL) && (xmlStrEqual(prop->ns->href, nameSpace)))) { + return(prop); + } + prop = prop->next; + } + if (!xmlCheckDTD) return(NULL); + + /* + * Check if there is a default declaration in the internal + * or external subsets + */ + doc = node->doc; + if (doc != NULL) { + if (doc->intSubset != NULL) { + xmlAttributePtr attrDecl = NULL; + xmlNsPtr *nsList, *cur; + xmlChar *ename; + + nsList = xmlGetNsList(node->doc, node); + if (nsList == NULL) + return(NULL); + if ((node->ns != NULL) && (node->ns->prefix != NULL)) { + ename = xmlStrdup(node->ns->prefix); + ename = xmlStrcat(ename, BAD_CAST ":"); + ename = xmlStrcat(ename, node->name); + } else { + ename = xmlStrdup(node->name); + } + if (ename == NULL) { + xmlFree(nsList); + return(NULL); + } + + cur = nsList; + while (*cur != NULL) { + if (xmlStrEqual((*cur)->href, nameSpace)) { + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, ename, + name, (*cur)->prefix); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, ename, + name, (*cur)->prefix); + } + cur++; + } + xmlFree(nsList); + xmlFree(ename); + return((xmlAttrPtr) attrDecl); + } + } + return(NULL); +} + +/** + * xmlGetProp: + * @node: the node + * @name: the attribute name + * + * Search and get the value of an attribute associated to a node + * This does the entity substitution. + * This function looks in DTD attribute declaration for #FIXED or + * default declaration values unless DTD use has been turned off. + * + * Returns the attribute value or NULL if not found. + * It's up to the caller to free the memory with xmlFree(). + */ +xmlChar * +xmlGetProp(xmlNodePtr node, const xmlChar *name) { + xmlAttrPtr prop; + xmlDocPtr doc; + + if ((node == NULL) || (name == NULL)) return(NULL); + /* + * Check on the properties attached to the node + */ + prop = node->properties; + while (prop != NULL) { + if (xmlStrEqual(prop->name, name)) { + xmlChar *ret; + + ret = xmlNodeListGetString(node->doc, prop->children, 1); + if (ret == NULL) return(xmlStrdup((xmlChar *)"")); + return(ret); + } + prop = prop->next; + } + if (!xmlCheckDTD) return(NULL); + + /* + * Check if there is a default declaration in the internal + * or external subsets + */ + doc = node->doc; + if (doc != NULL) { + xmlAttributePtr attrDecl; + if (doc->intSubset != NULL) { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, node->name, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, node->name, name); + if (attrDecl != NULL) + return(xmlStrdup(attrDecl->defaultValue)); + } + } + return(NULL); +} + +/** + * xmlGetNsProp: + * @node: the node + * @name: the attribute name + * @nameSpace: the URI of the namespace + * + * Search and get the value of an attribute associated to a node + * This attribute has to be anchored in the namespace specified. + * This does the entity substitution. + * This function looks in DTD attribute declaration for #FIXED or + * default declaration values unless DTD use has been turned off. + * + * Returns the attribute value or NULL if not found. + * It's up to the caller to free the memory with xmlFree(). + */ +xmlChar * +xmlGetNsProp(xmlNodePtr node, const xmlChar *name, const xmlChar *nameSpace) { + xmlAttrPtr prop; + xmlDocPtr doc; + xmlNsPtr ns; + + if (node == NULL) + return(NULL); + + prop = node->properties; + if (nameSpace == NULL) + return(xmlGetProp(node, name)); + while (prop != NULL) { + /* + * One need to have + * - same attribute names + * - and the attribute carrying that namespace + */ + if ((xmlStrEqual(prop->name, name)) && + ((prop->ns != NULL) && + (xmlStrEqual(prop->ns->href, nameSpace)))) { + xmlChar *ret; + + ret = xmlNodeListGetString(node->doc, prop->children, 1); + if (ret == NULL) return(xmlStrdup((xmlChar *)"")); + return(ret); + } + prop = prop->next; + } + if (!xmlCheckDTD) return(NULL); + + /* + * Check if there is a default declaration in the internal + * or external subsets + */ + doc = node->doc; + if (doc != NULL) { + if (doc->intSubset != NULL) { + xmlAttributePtr attrDecl; + + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, node->name, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, node->name, name); + + if ((attrDecl != NULL) && (attrDecl->prefix != NULL)) { + /* + * The DTD declaration only allows a prefix search + */ + ns = xmlSearchNs(doc, node, attrDecl->prefix); + if ((ns != NULL) && (xmlStrEqual(ns->href, nameSpace))) + return(xmlStrdup(attrDecl->defaultValue)); + } + } + } + return(NULL); +} + +/** + * xmlSetProp: + * @node: the node + * @name: the attribute name + * @value: the attribute value + * + * Set (or reset) an attribute carried by a node. + * Returns the attribute pointer. + */ +xmlAttrPtr +xmlSetProp(xmlNodePtr node, const xmlChar *name, const xmlChar *value) { + xmlAttrPtr prop; + xmlDocPtr doc; + + if ((node == NULL) || (name == NULL)) + return(NULL); + doc = node->doc; + prop = node->properties; + while (prop != NULL) { + if ((xmlStrEqual(prop->name, name)) && + (prop->ns == NULL)){ + xmlNodePtr oldprop = prop->children; + + prop->children = NULL; + prop->last = NULL; + if (value != NULL) { + xmlChar *buffer; + xmlNodePtr tmp; + + buffer = xmlEncodeEntitiesReentrant(node->doc, value); + prop->children = xmlStringGetNodeList(node->doc, buffer); + prop->last = NULL; + prop->doc = doc; + tmp = prop->children; + while (tmp != NULL) { + tmp->parent = (xmlNodePtr) prop; + tmp->doc = doc; + if (tmp->next == NULL) + prop->last = tmp; + tmp = tmp->next; + } + xmlFree(buffer); + } + if (oldprop != NULL) + xmlFreeNodeList(oldprop); + return(prop); + } + prop = prop->next; + } + prop = xmlNewProp(node, name, value); + return(prop); +} + +/** + * xmlUnsetProp: + * @node: the node + * @name: the attribute name + * + * Remove an attribute carried by a node. + * Returns 0 if successful, -1 if not found + */ +int +xmlUnsetProp(xmlNodePtr node, const xmlChar *name) { + xmlAttrPtr prop = node->properties, prev = NULL;; + + if ((node == NULL) || (name == NULL)) + return(-1); + while (prop != NULL) { + if ((xmlStrEqual(prop->name, name)) && + (prop->ns == NULL)) { + if (prev == NULL) + node->properties = prop->next; + else + prev->next = prop->next; + xmlFreeProp(prop); + return(0); + } + prev = prop; + prop = prop->next; + } + return(-1); +} + +/** + * xmlSetNsProp: + * @node: the node + * @ns: the namespace definition + * @name: the attribute name + * @value: the attribute value + * + * Set (or reset) an attribute carried by a node. + * The ns structure must be in scope, this is not checked. + * + * Returns the attribute pointer. + */ +xmlAttrPtr +xmlSetNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name, + const xmlChar *value) { + xmlAttrPtr prop; + + if ((node == NULL) || (name == NULL)) + return(NULL); + + if (ns == NULL) + return(xmlSetProp(node, name, value)); + if (ns->href == NULL) + return(NULL); + prop = node->properties; + + while (prop != NULL) { + /* + * One need to have + * - same attribute names + * - and the attribute carrying that namespace + */ + if ((xmlStrEqual(prop->name, name)) && + (prop->ns != NULL) && (xmlStrEqual(prop->ns->href, ns->href))) { + if (prop->children != NULL) + xmlFreeNodeList(prop->children); + prop->children = NULL; + prop->last = NULL; + prop->ns = ns; + if (value != NULL) { + xmlChar *buffer; + xmlNodePtr tmp; + + buffer = xmlEncodeEntitiesReentrant(node->doc, value); + prop->children = xmlStringGetNodeList(node->doc, buffer); + prop->last = NULL; + tmp = prop->children; + while (tmp != NULL) { + tmp->parent = (xmlNodePtr) prop; + if (tmp->next == NULL) + prop->last = tmp; + tmp = tmp->next; + } + xmlFree(buffer); + } + return(prop); + } + prop = prop->next; + } + prop = xmlNewNsProp(node, ns, name, value); + return(prop); +} + +/** + * xmlUnsetNsProp: + * @node: the node + * @ns: the namespace definition + * @name: the attribute name + * + * Remove an attribute carried by a node. + * Returns 0 if successful, -1 if not found + */ +int +xmlUnsetNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name) { + xmlAttrPtr prop = node->properties, prev = NULL;; + + if ((node == NULL) || (name == NULL)) + return(-1); + if (ns == NULL) + return(xmlUnsetProp(node, name)); + if (ns->href == NULL) + return(-1); + while (prop != NULL) { + if ((xmlStrEqual(prop->name, name)) && + (prop->ns != NULL) && (xmlStrEqual(prop->ns->href, ns->href))) { + if (prev == NULL) + node->properties = prop->next; + else + prev->next = prop->next; + xmlFreeProp(prop); + return(0); + } + prev = prop; + prop = prop->next; + } + return(-1); +} + +/** + * xmlNodeIsText: + * @node: the node + * + * Is this node a Text node ? + * Returns 1 yes, 0 no + */ +int +xmlNodeIsText(xmlNodePtr node) { + if (node == NULL) return(0); + + if (node->type == XML_TEXT_NODE) return(1); + return(0); +} + +/** + * xmlIsBlankNode: + * @node: the node + * + * Checks whether this node is an empty or whitespace only + * (and possibly ignorable) text-node. + * + * Returns 1 yes, 0 no + */ +int +xmlIsBlankNode(xmlNodePtr node) { + const xmlChar *cur; + if (node == NULL) return(0); + + if ((node->type != XML_TEXT_NODE) && + (node->type != XML_CDATA_SECTION_NODE)) + return(0); + if (node->content == NULL) return(1); + cur = node->content; + while (*cur != 0) { + if (!IS_BLANK(*cur)) return(0); + cur++; + } + + return(1); +} + +/** + * xmlTextConcat: + * @node: the node + * @content: the content + * @len: @content length + * + * Concat the given string at the end of the existing node content + */ + +void +xmlTextConcat(xmlNodePtr node, const xmlChar *content, int len) { + if (node == NULL) return; + + if ((node->type != XML_TEXT_NODE) && + (node->type != XML_CDATA_SECTION_NODE)) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlTextConcat: node is not text nor CDATA\n"); +#endif + return; + } + node->content = xmlStrncat(node->content, content, len); +} + +/************************************************************************ + * * + * Output : to a FILE or in memory * + * * + ************************************************************************/ + +/** + * xmlBufferCreate: + * + * routine to create an XML buffer. + * returns the new structure. + */ +xmlBufferPtr +xmlBufferCreate(void) { + xmlBufferPtr ret; + + ret = (xmlBufferPtr) xmlMalloc(sizeof(xmlBuffer)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlBufferCreate : out of memory!\n"); + return(NULL); + } + ret->use = 0; + ret->size = xmlDefaultBufferSize; + ret->alloc = xmlBufferAllocScheme; + ret->content = (xmlChar *) xmlMalloc(ret->size * sizeof(xmlChar)); + if (ret->content == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlBufferCreate : out of memory!\n"); + xmlFree(ret); + return(NULL); + } + ret->content[0] = 0; + return(ret); +} + +/** + * xmlBufferCreateSize: + * @size: initial size of buffer + * + * routine to create an XML buffer. + * returns the new structure. + */ +xmlBufferPtr +xmlBufferCreateSize(size_t size) { + xmlBufferPtr ret; + + ret = (xmlBufferPtr) xmlMalloc(sizeof(xmlBuffer)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlBufferCreate : out of memory!\n"); + return(NULL); + } + ret->use = 0; + ret->alloc = xmlBufferAllocScheme; + ret->size = (size ? size+2 : 0); /* +1 for ending null */ + if (ret->size){ + ret->content = (xmlChar *) xmlMalloc(ret->size * sizeof(xmlChar)); + if (ret->content == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlBufferCreate : out of memory!\n"); + xmlFree(ret); + return(NULL); + } + ret->content[0] = 0; + } else + ret->content = NULL; + return(ret); +} + +/** + * xmlBufferSetAllocationScheme: + * @buf: the buffer to tune + * @scheme: allocation scheme to use + * + * Sets the allocation scheme for this buffer + */ +void +xmlBufferSetAllocationScheme(xmlBufferPtr buf, + xmlBufferAllocationScheme scheme) { + if (buf == NULL) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferSetAllocationScheme: buf == NULL\n"); +#endif + return; + } + + buf->alloc = scheme; +} + +/** + * xmlBufferFree: + * @buf: the buffer to free + * + * Frees an XML buffer. It frees both the content and the structure which + * encapsulate it. + */ +void +xmlBufferFree(xmlBufferPtr buf) { + if (buf == NULL) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferFree: buf == NULL\n"); +#endif + return; + } + if (buf->content != NULL) { + xmlFree(buf->content); + } + xmlFree(buf); +} + +/** + * xmlBufferEmpty: + * @buf: the buffer + * + * empty a buffer. + */ +void +xmlBufferEmpty(xmlBufferPtr buf) { + if (buf->content == NULL) return; + buf->use = 0; + memset(buf->content, 0, buf->size); +} + +/** + * xmlBufferShrink: + * @buf: the buffer to dump + * @len: the number of xmlChar to remove + * + * Remove the beginning of an XML buffer. + * + * Returns the number of #xmlChar removed, or -1 in case of failure. + */ +int +xmlBufferShrink(xmlBufferPtr buf, unsigned int len) { + if (len == 0) return(0); + if (len > buf->use) return(-1); + + buf->use -= len; + memmove(buf->content, &buf->content[len], buf->use * sizeof(xmlChar)); + + buf->content[buf->use] = 0; + return(len); +} + +/** + * xmlBufferGrow: + * @buf: the buffer + * @len: the minimum free size to allocate + * + * Grow the available space of an XML buffer. + * + * Returns the new available space or -1 in case of error + */ +int +xmlBufferGrow(xmlBufferPtr buf, unsigned int len) { + int size; + xmlChar *newbuf; + + if (len + buf->use < buf->size) return(0); + + size = buf->use + len + 100; + + newbuf = (xmlChar *) xmlRealloc(buf->content, size); + if (newbuf == NULL) return(-1); + buf->content = newbuf; + buf->size = size; + return(buf->size - buf->use); +} + +/** + * xmlBufferDump: + * @file: the file output + * @buf: the buffer to dump + * + * Dumps an XML buffer to a FILE *. + * Returns the number of #xmlChar written + */ +int +xmlBufferDump(FILE *file, xmlBufferPtr buf) { + int ret; + + if (buf == NULL) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferDump: buf == NULL\n"); +#endif + return(0); + } + if (buf->content == NULL) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferDump: buf->content == NULL\n"); +#endif + return(0); + } + if (file == NULL) + file = stdout; + ret = fwrite(buf->content, sizeof(xmlChar), buf->use, file); + return(ret); +} + +/** + * xmlBufferContent: + * @buf: the buffer + * + * Function to extract the content of a buffer + * + * Returns the internal content + */ + +const xmlChar * +xmlBufferContent(const xmlBufferPtr buf) +{ + if(!buf) + return NULL; + + return buf->content; +} + +/** + * xmlBufferLength: + * @buf: the buffer + * + * Function to get the length of a buffer + * + * Returns the length of data in the internal content + */ + +int +xmlBufferLength(const xmlBufferPtr buf) +{ + if(!buf) + return 0; + + return buf->use; +} + +/** + * xmlBufferResize: + * @buf: the buffer to resize + * @size: the desired size + * + * Resize a buffer to accommodate minimum size of @size. + * + * Returns 0 in case of problems, 1 otherwise + */ +int +xmlBufferResize(xmlBufferPtr buf, unsigned int size) +{ + unsigned int newSize; + xmlChar* rebuf = NULL; + + /*take care of empty case*/ + newSize = (buf->size ? buf->size*2 : size); + + /* Don't resize if we don't have to */ + if (size < buf->size) + return 1; + + /* figure out new size */ + switch (buf->alloc){ + case XML_BUFFER_ALLOC_DOUBLEIT: + while (size > newSize) newSize *= 2; + break; + case XML_BUFFER_ALLOC_EXACT: + newSize = size+10; + break; + default: + newSize = size+10; + break; + } + + if (buf->content == NULL) + rebuf = (xmlChar *) xmlMalloc(newSize * sizeof(xmlChar)); + else + rebuf = (xmlChar *) xmlRealloc(buf->content, + newSize * sizeof(xmlChar)); + if (rebuf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlBufferResize : out of memory!\n"); + return 0; + } + buf->content = rebuf; + buf->size = newSize; + + return 1; +} + +/** + * xmlBufferAdd: + * @buf: the buffer to dump + * @str: the #xmlChar string + * @len: the number of #xmlChar to add + * + * Add a string range to an XML buffer. if len == -1, the length of + * str is recomputed. + */ +void +xmlBufferAdd(xmlBufferPtr buf, const xmlChar *str, int len) { + unsigned int needSize; + + if (str == NULL) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferAdd: str == NULL\n"); +#endif + return; + } + if (len < -1) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferAdd: len < 0\n"); +#endif + return; + } + if (len == 0) return; + + if (len < 0) + len = xmlStrlen(str); + + if (len <= 0) return; + + needSize = buf->use + len + 2; + if (needSize > buf->size){ + if (!xmlBufferResize(buf, needSize)){ + xmlGenericError(xmlGenericErrorContext, + "xmlBufferAdd : out of memory!\n"); + return; + } + } + + memmove(&buf->content[buf->use], str, len*sizeof(xmlChar)); + buf->use += len; + buf->content[buf->use] = 0; +} + +/** + * xmlBufferAddHead: + * @buf: the buffer + * @str: the #xmlChar string + * @len: the number of #xmlChar to add + * + * Add a string range to the beginning of an XML buffer. + * if len == -1, the length of @str is recomputed. + */ +void +xmlBufferAddHead(xmlBufferPtr buf, const xmlChar *str, int len) { + unsigned int needSize; + + if (str == NULL) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferAddHead: str == NULL\n"); +#endif + return; + } + if (len < -1) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferAddHead: len < 0\n"); +#endif + return; + } + if (len == 0) return; + + if (len < 0) + len = xmlStrlen(str); + + if (len <= 0) return; + + needSize = buf->use + len + 2; + if (needSize > buf->size){ + if (!xmlBufferResize(buf, needSize)){ + xmlGenericError(xmlGenericErrorContext, + "xmlBufferAddHead : out of memory!\n"); + return; + } + } + + memmove(&buf->content[len], &buf->content[0], buf->use * sizeof(xmlChar)); + memmove(&buf->content[0], str, len * sizeof(xmlChar)); + buf->use += len; + buf->content[buf->use] = 0; +} + +/** + * xmlBufferCat: + * @buf: the buffer to dump + * @str: the #xmlChar string + * + * Append a zero terminated string to an XML buffer. + */ +void +xmlBufferCat(xmlBufferPtr buf, const xmlChar *str) { + if (str != NULL) + xmlBufferAdd(buf, str, -1); +} + +/** + * xmlBufferCCat: + * @buf: the buffer to dump + * @str: the C char string + * + * Append a zero terminated C string to an XML buffer. + */ +void +xmlBufferCCat(xmlBufferPtr buf, const char *str) { + const char *cur; + + if (str == NULL) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferCCat: str == NULL\n"); +#endif + return; + } + for (cur = str;*cur != 0;cur++) { + if (buf->use + 10 >= buf->size) { + if (!xmlBufferResize(buf, buf->use+10)){ + xmlGenericError(xmlGenericErrorContext, + "xmlBufferCCat : out of memory!\n"); + return; + } + } + buf->content[buf->use++] = *cur; + } + buf->content[buf->use] = 0; +} + +/** + * xmlBufferWriteCHAR: + * @buf: the XML buffer + * @string: the string to add + * + * routine which manages and grows an output buffer. This one adds + * xmlChars at the end of the buffer. + */ +void +xmlBufferWriteCHAR +(xmlBufferPtr buf, const xmlChar *string) { + xmlBufferCat(buf, string); +} + +/** + * xmlBufferWriteChar: + * @buf: the XML buffer output + * @string: the string to add + * + * routine which manage and grows an output buffer. This one add + * C chars at the end of the array. + */ +void +xmlBufferWriteChar(xmlBufferPtr buf, const char *string) { + xmlBufferCCat(buf, string); +} + + +/** + * xmlBufferWriteQuotedString: + * @buf: the XML buffer output + * @string: the string to add + * + * routine which manage and grows an output buffer. This one writes + * a quoted or double quoted #xmlChar string, checking first if it holds + * quote or double-quotes internally + */ +void +xmlBufferWriteQuotedString(xmlBufferPtr buf, const xmlChar *string) { + if (xmlStrchr(string, '"')) { + if (xmlStrchr(string, '\'')) { +#ifdef DEBUG_BUFFER + xmlGenericError(xmlGenericErrorContext, + "xmlBufferWriteQuotedString: string contains quote and double-quotes !\n"); +#endif + } + xmlBufferCCat(buf, "'"); + xmlBufferCat(buf, string); + xmlBufferCCat(buf, "'"); + } else { + xmlBufferCCat(buf, "\""); + xmlBufferCat(buf, string); + xmlBufferCCat(buf, "\""); + } +} + + +/************************************************************************ + * * + * Dumping XML tree content to a simple buffer * + * * + ************************************************************************/ + +/** + * xmlAttrSerializeContent: + * @buf: the XML buffer output + * @doc: the document + * @attr: the attribute pointer + * + * Serialize the attribute in the buffer + */ +static void +xmlAttrSerializeContent(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr attr) +{ + const xmlChar *cur, *base; + xmlNodePtr children; + + children = attr->children; + while (children != NULL) { + switch (children->type) { + case XML_TEXT_NODE: + base = cur = children->content; + while (*cur != 0) { + if (*cur == '\n') { + if (base != cur) + xmlBufferAdd(buf, base, cur - base); + xmlBufferAdd(buf, BAD_CAST " ", 5); + cur++; + base = cur; +#if 0 + } else if (*cur == '\'') { + if (base != cur) + xmlBufferAdd(buf, base, cur - base); + xmlBufferAdd(buf, BAD_CAST "'", 6); + cur++; + base = cur; +#endif + } else if (*cur == '"') { + if (base != cur) + xmlBufferAdd(buf, base, cur - base); + xmlBufferAdd(buf, BAD_CAST """, 6); + cur++; + base = cur; + } else if (*cur == '<') { + if (base != cur) + xmlBufferAdd(buf, base, cur - base); + xmlBufferAdd(buf, BAD_CAST "<", 4); + cur++; + base = cur; + } else if (*cur == '>') { + if (base != cur) + xmlBufferAdd(buf, base, cur - base); + xmlBufferAdd(buf, BAD_CAST ">", 4); + cur++; + base = cur; + } else if (*cur == '&') { + if (base != cur) + xmlBufferAdd(buf, base, cur - base); + xmlBufferAdd(buf, BAD_CAST "&", 5); + cur++; + base = cur; + } else if ((*cur >= 0x80) && ((doc == NULL) || + (doc->encoding == + NULL))) { + /* + * We assume we have UTF-8 content. + */ + char tmp[10]; + int val = 0, l = 1; + + if (base != cur) + xmlBufferAdd(buf, base, cur - base); + if (*cur < 0xC0) { + xmlGenericError(xmlGenericErrorContext, + "xmlAttrSerializeContent : input not UTF-8\n"); + if (doc != NULL) + doc->encoding = + xmlStrdup(BAD_CAST "ISO-8859-1"); + snprintf(tmp, sizeof(tmp), "&#%d;", *cur); + tmp[sizeof(tmp) - 1] = 0; + xmlBufferAdd(buf, (xmlChar *) tmp, -1); + cur++; + base = cur; + continue; + } else if (*cur < 0xE0) { + val = (cur[0]) & 0x1F; + val <<= 6; + val |= (cur[1]) & 0x3F; + l = 2; + } else if (*cur < 0xF0) { + val = (cur[0]) & 0x0F; + val <<= 6; + val |= (cur[1]) & 0x3F; + val <<= 6; + val |= (cur[2]) & 0x3F; + l = 3; + } else if (*cur < 0xF8) { + val = (cur[0]) & 0x07; + val <<= 6; + val |= (cur[1]) & 0x3F; + val <<= 6; + val |= (cur[2]) & 0x3F; + val <<= 6; + val |= (cur[3]) & 0x3F; + l = 4; + } + if ((l == 1) || (!IS_CHAR(val))) { + xmlGenericError(xmlGenericErrorContext, + "xmlAttrSerializeContent : char out of range\n"); + if (doc != NULL) + doc->encoding = + xmlStrdup(BAD_CAST "ISO-8859-1"); + snprintf(tmp, sizeof(tmp), "&#%d;", *cur); + tmp[sizeof(tmp) - 1] = 0; + xmlBufferAdd(buf, (xmlChar *) tmp, -1); + cur++; + base = cur; + continue; + } + /* + * We could do multiple things here. Just save + * as a char ref + */ + snprintf(tmp, sizeof(tmp), "&#x%X;", val); + tmp[sizeof(tmp) - 1] = 0; + xmlBufferAdd(buf, (xmlChar *) tmp, -1); + cur += l; + base = cur; + } else { + cur++; + } + } + if (base != cur) + xmlBufferAdd(buf, base, cur - base); + break; + case XML_ENTITY_REF_NODE: + xmlBufferAdd(buf, BAD_CAST "&", 1); + xmlBufferAdd(buf, children->name, + xmlStrlen(children->name)); + xmlBufferAdd(buf, BAD_CAST ";", 1); + break; + default: + /* should not happen unless we have a badly built tree */ + break; + } + children = children->next; + } +} + +/** + * xmlNodeDump: + * @buf: the XML buffer output + * @doc: the document + * @cur: the current node + * @level: the imbrication level for indenting + * @format: is formatting allowed + * + * Dump an XML node, recursive behaviour,children are printed too. + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + * + * Returns the number of bytes written to the buffer or -1 in case of error + */ +int +xmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level, + int format) +{ + unsigned int use; + int ret; + xmlOutputBufferPtr outbuf; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeDump : node == NULL\n"); +#endif + return (-1); + } + if (buf == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeDump : buf == NULL\n"); +#endif + return (-1); + } + outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); + if (outbuf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNodeDump: out of memory!\n"); + return (-1); + } + memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); + outbuf->buffer = buf; + outbuf->encoder = NULL; + outbuf->writecallback = NULL; + outbuf->closecallback = NULL; + outbuf->context = NULL; + outbuf->written = 0; + + use = buf->use; + xmlNodeDumpOutput(outbuf, doc, cur, level, format, NULL); + xmlFree(outbuf); + ret = buf->use - use; + return (ret); +} + +/** + * xmlElemDump: + * @f: the FILE * for the output + * @doc: the document + * @cur: the current node + * + * Dump an XML/HTML node, recursive behaviour, children are printed too. + */ +void +xmlElemDump(FILE * f, xmlDocPtr doc, xmlNodePtr cur) +{ + xmlOutputBufferPtr outbuf; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlElemDump : cur == NULL\n"); +#endif + return; + } +#ifdef DEBUG_TREE + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlElemDump : doc == NULL\n"); + } +#endif + + outbuf = xmlOutputBufferCreateFile(f, NULL); + if (outbuf == NULL) + return; + if ((doc != NULL) && (doc->type == XML_HTML_DOCUMENT_NODE)) { +#ifdef LIBXML_HTML_ENABLED + htmlNodeDumpOutput(outbuf, doc, cur, NULL); +#else + xmlGenericError(xmlGenericErrorContext, + "HTML support not compiled in\n"); +#endif /* LIBXML_HTML_ENABLED */ + } else + xmlNodeDumpOutput(outbuf, doc, cur, 0, 1, NULL); + xmlOutputBufferClose(outbuf); +} + +/************************************************************************ + * * + * Dumping XML tree content to an I/O output buffer * + * * + ************************************************************************/ + +static void +xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int level, int format, const char *encoding); +static void +xmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int level, int format, const char *encoding); +static void +xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, int level, int format, const char *encoding); + +/** + * xmlNsDumpOutput: + * @buf: the XML buffer output + * @cur: a namespace + * + * Dump a local Namespace definition. + * Should be called in the context of attributes dumps. + */ +static void +xmlNsDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNsDumpOutput : Ns == NULL\n"); +#endif + return; + } + if ((cur->type == XML_LOCAL_NAMESPACE) && (cur->href != NULL)) { + if (xmlStrEqual(cur->prefix, BAD_CAST "xml")) + return; + + /* Within the context of an element attributes */ + if (cur->prefix != NULL) { + xmlOutputBufferWriteString(buf, " xmlns:"); + xmlOutputBufferWriteString(buf, (const char *)cur->prefix); + } else + xmlOutputBufferWriteString(buf, " xmlns"); + xmlOutputBufferWriteString(buf, "="); + xmlBufferWriteQuotedString(buf->buffer, cur->href); + } +} + +/** + * xmlNsListDumpOutput: + * @buf: the XML buffer output + * @cur: the first namespace + * + * Dump a list of local Namespace definitions. + * Should be called in the context of attributes dumps. + */ +static void +xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) { + while (cur != NULL) { + xmlNsDumpOutput(buf, cur); + cur = cur->next; + } +} + +/** + * xmlDtdDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @encoding: an optional encoding string + * + * Dump the XML document DTD, if any. + */ +static void +xmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDtdPtr dtd, const char *encoding) { + if (dtd == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlDtdDumpOutput : no internal subset\n"); +#endif + return; + } + xmlOutputBufferWriteString(buf, "<!DOCTYPE "); + xmlOutputBufferWriteString(buf, (const char *)dtd->name); + if (dtd->ExternalID != NULL) { + xmlOutputBufferWriteString(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf->buffer, dtd->ExternalID); + xmlOutputBufferWriteString(buf, " "); + xmlBufferWriteQuotedString(buf->buffer, dtd->SystemID); + } else if (dtd->SystemID != NULL) { + xmlOutputBufferWriteString(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf->buffer, dtd->SystemID); + } + if ((dtd->entities == NULL) && (dtd->elements == NULL) && + (dtd->attributes == NULL) && (dtd->notations == NULL)) { + xmlOutputBufferWriteString(buf, ">"); + return; + } + xmlOutputBufferWriteString(buf, " [\n"); + xmlNodeListDumpOutput(buf, dtd->doc, dtd->children, -1, 0, encoding); + xmlOutputBufferWriteString(buf, "]>"); +} + +/** + * xmlAttrDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the attribute pointer + * @encoding: an optional encoding string + * + * Dump an XML attribute + */ +static void +xmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, + const char *encoding ATTRIBUTE_UNUSED) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAttrDumpOutput : property == NULL\n"); +#endif + return; + } + xmlOutputBufferWriteString(buf, " "); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, "=\""); + xmlAttrSerializeContent(buf->buffer, doc, cur); + xmlOutputBufferWriteString(buf, "\""); +} + +/** + * xmlAttrListDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the first attribute pointer + * @encoding: an optional encoding string + * + * Dump a list of XML attributes + */ +static void +xmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlAttrPtr cur, const char *encoding) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAttrListDumpOutput : property == NULL\n"); +#endif + return; + } + while (cur != NULL) { + xmlAttrDumpOutput(buf, doc, cur, encoding); + cur = cur->next; + } +} + + + +/** + * xmlNodeListDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the first node + * @level: the imbrication level for indenting + * @format: is formatting allowed + * @encoding: an optional encoding string + * + * Dump an XML node list, recursive behaviour, children are printed too. + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + */ +static void +xmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, int level, int format, const char *encoding) { + int i; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeListDumpOutput : node == NULL\n"); +#endif + return; + } + while (cur != NULL) { + if ((format) && (xmlIndentTreeOutput) && + (cur->type == XML_ELEMENT_NODE)) + for (i = 0;i < level;i++) + xmlOutputBufferWriteString(buf, xmlTreeIndentString); + xmlNodeDumpOutputInternal(buf, doc, cur, level, format, encoding); + if (format) { + xmlOutputBufferWriteString(buf, "\n"); + } + cur = cur->next; + } +} + +/** + * xmlNodeDumpOutputInternal: + * @buf: the XML buffer output + * @doc: the document + * @cur: the current node + * @level: the imbrication level for indenting + * @format: is formatting allowed + * @encoding: an optional encoding string + * + * Dump an XML node, recursive behaviour, children are printed too. + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + */ +static void +xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, int level, int format, const char *encoding) { + int i; + xmlNodePtr tmp; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeDumpOutput : node == NULL\n"); +#endif + return; + } + if (cur->type == XML_XINCLUDE_START) + return; + if (cur->type == XML_XINCLUDE_END) + return; + if (cur->type == XML_DTD_NODE) { + xmlDtdDumpOutput(buf, (xmlDtdPtr) cur, encoding); + return; + } + if (cur->type == XML_ELEMENT_DECL) { + xmlDumpElementDecl(buf->buffer, (xmlElementPtr) cur); + return; + } + if (cur->type == XML_ATTRIBUTE_DECL) { + xmlDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); + return; + } + if (cur->type == XML_ENTITY_DECL) { + xmlDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); + return; + } + if (cur->type == XML_TEXT_NODE) { + if (cur->content != NULL) { + if ((cur->name == xmlStringText) || + (cur->name != xmlStringTextNoenc)) { + xmlChar *buffer; + + if (encoding == NULL) + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + else + buffer = xmlEncodeSpecialChars(doc, cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } else { + /* + * Disable escaping, needed for XSLT + */ + xmlOutputBufferWriteString(buf, (const char *) cur->content); + } + } + + return; + } + if (cur->type == XML_PI_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, "<?"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, " "); + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + xmlOutputBufferWriteString(buf, "?>"); + } else { + xmlOutputBufferWriteString(buf, "<?"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, "?>"); + } + return; + } + if (cur->type == XML_COMMENT_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, "<!--"); + xmlOutputBufferWriteString(buf, (const char *)cur->content); + xmlOutputBufferWriteString(buf, "-->"); + } + return; + } + if (cur->type == XML_ENTITY_REF_NODE) { + xmlOutputBufferWriteString(buf, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ";"); + return; + } + if (cur->type == XML_CDATA_SECTION_NODE) { + xmlOutputBufferWriteString(buf, "<![CDATA["); + if (cur->content != NULL) + xmlOutputBufferWriteString(buf, (const char *)cur->content); + xmlOutputBufferWriteString(buf, "]]>"); + return; + } + + if (format == 1) { + tmp = cur->children; + while (tmp != NULL) { + if ((tmp->type == XML_TEXT_NODE) || + (tmp->type == XML_ENTITY_REF_NODE)) { + format = 0; + break; + } + tmp = tmp->next; + } + } + xmlOutputBufferWriteString(buf, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->nsDef) + xmlNsListDumpOutput(buf, cur->nsDef); + if (cur->properties != NULL) + xmlAttrListDumpOutput(buf, doc, cur->properties, encoding); + + if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && + (cur->children == NULL) && (!xmlSaveNoEmptyTags)) { + xmlOutputBufferWriteString(buf, "/>"); + return; + } + xmlOutputBufferWriteString(buf, ">"); + if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) { + xmlChar *buffer; + + if (encoding == NULL) + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + else + buffer = xmlEncodeSpecialChars(doc, cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } + if (cur->children != NULL) { + if (format) xmlOutputBufferWriteString(buf, "\n"); + xmlNodeListDumpOutput(buf, doc, cur->children, + (level >= 0?level+1:-1), format, encoding); + if ((xmlIndentTreeOutput) && (format)) + for (i = 0;i < level;i++) + xmlOutputBufferWriteString(buf, xmlTreeIndentString); + } + xmlOutputBufferWriteString(buf, "</"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); +} + +/** + * xmlNodeDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the current node + * @level: the imbrication level for indenting + * @format: is formatting allowed + * @encoding: an optional encoding string + * + * Dump an XML node, recursive behaviour, children are printed too. + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + */ +void +xmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int level, int format, const char *encoding) +{ +#ifdef LIBXML_HTML_ENABLED + xmlDtdPtr dtd; + int is_xhtml = 0; + + dtd = xmlGetIntSubset(doc); + if (dtd != NULL) { + is_xhtml = xmlIsXHTML(dtd->SystemID, dtd->ExternalID); + if (is_xhtml < 0) + is_xhtml = 0; + if ((is_xhtml) && (cur->parent == (xmlNodePtr) doc) && + (cur->type == XML_ELEMENT_NODE) && + (xmlStrEqual(cur->name, BAD_CAST "html"))) { + if (encoding != NULL) + htmlSetMetaEncoding((htmlDocPtr) cur, + (const xmlChar *) encoding); + else + htmlSetMetaEncoding((htmlDocPtr) cur, BAD_CAST "UTF-8"); + } + } + + if (is_xhtml) + xhtmlNodeDumpOutput(buf, doc, cur, level, format, encoding); + else +#endif + xmlNodeDumpOutputInternal(buf, doc, cur, level, format, encoding); +} + +/** + * xmlDocContentDumpOutput: + * @buf: the XML buffer output + * @cur: the document + * @encoding: an optional encoding string + * @format: should formatting spaces been added + * + * Dump an XML document. + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + */ +static void +xmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + const char *encoding, int format) { +#ifdef LIBXML_HTML_ENABLED + xmlDtdPtr dtd; + int is_xhtml = 0; +#endif + + xmlOutputBufferWriteString(buf, "<?xml version="); + if (cur->version != NULL) + xmlBufferWriteQuotedString(buf->buffer, cur->version); + else + xmlOutputBufferWriteString(buf, "\"1.0\""); + if (encoding == NULL) { + if (cur->encoding != NULL) + encoding = (const char *) cur->encoding; + else if (cur->charset != XML_CHAR_ENCODING_UTF8) + encoding = xmlGetCharEncodingName((xmlCharEncoding) cur->charset); + } + if (encoding != NULL) { + xmlOutputBufferWriteString(buf, " encoding="); + xmlBufferWriteQuotedString(buf->buffer, (xmlChar *) encoding); + } + switch (cur->standalone) { + case 0: + xmlOutputBufferWriteString(buf, " standalone=\"no\""); + break; + case 1: + xmlOutputBufferWriteString(buf, " standalone=\"yes\""); + break; + } + xmlOutputBufferWriteString(buf, "?>\n"); + +#ifdef LIBXML_HTML_ENABLED + dtd = xmlGetIntSubset(cur); + if (dtd != NULL) { + is_xhtml = xmlIsXHTML(dtd->SystemID, dtd->ExternalID); + if (is_xhtml < 0) is_xhtml = 0; + } + if (is_xhtml) { + if (encoding != NULL) + htmlSetMetaEncoding(cur, (const xmlChar *) encoding); + else + htmlSetMetaEncoding(cur, BAD_CAST "UTF-8"); + } +#endif + if (cur->children != NULL) { + xmlNodePtr child = cur->children; + + while (child != NULL) { +#ifdef LIBXML_HTML_ENABLED + if (is_xhtml) + xhtmlNodeDumpOutput(buf, cur, child, 0, format, encoding); + else +#endif + xmlNodeDumpOutputInternal(buf, cur, child, 0, format, encoding); + xmlOutputBufferWriteString(buf, "\n"); + child = child->next; + } + } +} + +#ifdef LIBXML_HTML_ENABLED +/************************************************************************ + * * + * Functions specific to XHTML serialization * + * * + ************************************************************************/ + +#define XHTML_STRICT_PUBLIC_ID BAD_CAST \ + "-//W3C//DTD XHTML 1.0 Strict//EN" +#define XHTML_STRICT_SYSTEM_ID BAD_CAST \ + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" +#define XHTML_FRAME_PUBLIC_ID BAD_CAST \ + "-//W3C//DTD XHTML 1.0 Frameset//EN" +#define XHTML_FRAME_SYSTEM_ID BAD_CAST \ + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd" +#define XHTML_TRANS_PUBLIC_ID BAD_CAST \ + "-//W3C//DTD XHTML 1.0 Transitional//EN" +#define XHTML_TRANS_SYSTEM_ID BAD_CAST \ + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" + +#define XHTML_NS_NAME BAD_CAST "http://www.w3.org/1999/xhtml" +/** + * xmlIsXHTML: + * @systemID: the system identifier + * @publicID: the public identifier + * + * Try to find if the document correspond to an XHTML DTD + * + * Returns 1 if true, 0 if not and -1 in case of error + */ +int +xmlIsXHTML(const xmlChar *systemID, const xmlChar *publicID) { + if ((systemID == NULL) && (publicID == NULL)) + return(-1); + if (publicID != NULL) { + if (xmlStrEqual(publicID, XHTML_STRICT_PUBLIC_ID)) return(1); + if (xmlStrEqual(publicID, XHTML_FRAME_PUBLIC_ID)) return(1); + if (xmlStrEqual(publicID, XHTML_TRANS_PUBLIC_ID)) return(1); + } + if (systemID != NULL) { + if (xmlStrEqual(systemID, XHTML_STRICT_SYSTEM_ID)) return(1); + if (xmlStrEqual(systemID, XHTML_FRAME_SYSTEM_ID)) return(1); + if (xmlStrEqual(systemID, XHTML_TRANS_SYSTEM_ID)) return(1); + } + return(0); +} + +/** + * xhtmlIsEmpty: + * @node: the node + * + * Check if a node is an empty xhtml node + * + * Returns 1 if the node is an empty node, 0 if not and -1 in case of error + */ +static int +xhtmlIsEmpty(xmlNodePtr node) { + if (node == NULL) + return(-1); + if (node->type != XML_ELEMENT_NODE) + return(0); + if ((node->ns != NULL) && (!xmlStrEqual(node->ns->href, XHTML_NS_NAME))) + return(0); + if (node->children != NULL) + return(0); + switch (node->name[0]) { + case 'a': + if (xmlStrEqual(node->name, BAD_CAST "area")) + return(1); + return(0); + case 'b': + if (xmlStrEqual(node->name, BAD_CAST "br")) + return(1); + if (xmlStrEqual(node->name, BAD_CAST "base")) + return(1); + if (xmlStrEqual(node->name, BAD_CAST "basefont")) + return(1); + return(0); + case 'c': + if (xmlStrEqual(node->name, BAD_CAST "col")) + return(1); + return(0); + case 'f': + if (xmlStrEqual(node->name, BAD_CAST "frame")) + return(1); + return(0); + case 'h': + if (xmlStrEqual(node->name, BAD_CAST "hr")) + return(1); + return(0); + case 'i': + if (xmlStrEqual(node->name, BAD_CAST "img")) + return(1); + if (xmlStrEqual(node->name, BAD_CAST "input")) + return(1); + if (xmlStrEqual(node->name, BAD_CAST "isindex")) + return(1); + return(0); + case 'l': + if (xmlStrEqual(node->name, BAD_CAST "link")) + return(1); + return(0); + case 'm': + if (xmlStrEqual(node->name, BAD_CAST "meta")) + return(1); + return(0); + case 'p': + if (xmlStrEqual(node->name, BAD_CAST "param")) + return(1); + return(0); + } + return(0); +} + +/** + * xhtmlAttrListDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the first attribute pointer + * @encoding: an optional encoding string + * + * Dump a list of XML attributes + */ +static void +xhtmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlAttrPtr cur, const char *encoding) { + xmlAttrPtr xml_lang = NULL; + xmlAttrPtr lang = NULL; + xmlAttrPtr name = NULL; + xmlAttrPtr id = NULL; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAttrListDumpOutput : property == NULL\n"); +#endif + return; + } + while (cur != NULL) { + if ((cur->ns == NULL) && (xmlStrEqual(cur->name, BAD_CAST "id"))) + id = cur; + else + if ((cur->ns == NULL) && (xmlStrEqual(cur->name, BAD_CAST "name"))) + name = cur; + else + if ((cur->ns == NULL) && (xmlStrEqual(cur->name, BAD_CAST "lang"))) + lang = cur; + else + if ((cur->ns != NULL) && (xmlStrEqual(cur->name, BAD_CAST "lang")) && + (xmlStrEqual(cur->ns->prefix, BAD_CAST "xml"))) + xml_lang = cur; + else if ((cur->ns == NULL) && + ((cur->children == NULL) || + (cur->children->content == NULL) || + (cur->children->content[0] == 0)) && + (htmlIsBooleanAttr(cur->name))) { + if (cur->children != NULL) + xmlFreeNode(cur->children); + cur->children = xmlNewText(cur->name); + if (cur->children != NULL) + cur->children->parent = (xmlNodePtr) cur; + } + xmlAttrDumpOutput(buf, doc, cur, encoding); + cur = cur->next; + } + /* + * C.8 + */ + if ((name != NULL) && (id == NULL)) { + xmlOutputBufferWriteString(buf, " id=\""); + xmlAttrSerializeContent(buf->buffer, doc, name); + xmlOutputBufferWriteString(buf, "\""); + } + /* + * C.7. + */ + if ((lang != NULL) && (xml_lang == NULL)) { + xmlOutputBufferWriteString(buf, " xml:lang=\""); + xmlAttrSerializeContent(buf->buffer, doc, lang); + xmlOutputBufferWriteString(buf, "\""); + } else + if ((xml_lang != NULL) && (lang == NULL)) { + xmlOutputBufferWriteString(buf, " lang=\""); + xmlAttrSerializeContent(buf->buffer, doc, xml_lang); + xmlOutputBufferWriteString(buf, "\""); + } +} + +/** + * xhtmlNodeListDumpOutput: + * @buf: the XML buffer output + * @doc: the XHTML document + * @cur: the first node + * @level: the imbrication level for indenting + * @format: is formatting allowed + * @encoding: an optional encoding string + * + * Dump an XML node list, recursive behaviour, children are printed too. + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + */ +static void +xhtmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, int level, int format, const char *encoding) { + int i; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xhtmlNodeListDumpOutput : node == NULL\n"); +#endif + return; + } + while (cur != NULL) { + if ((format) && (xmlIndentTreeOutput) && + (cur->type == XML_ELEMENT_NODE)) + for (i = 0;i < level;i++) + xmlOutputBufferWriteString(buf, xmlTreeIndentString); + xhtmlNodeDumpOutput(buf, doc, cur, level, format, encoding); + if (format) { + xmlOutputBufferWriteString(buf, "\n"); + } + cur = cur->next; + } +} + +/** + * xhtmlNodeDumpOutput: + * @buf: the XML buffer output + * @doc: the XHTML document + * @cur: the current node + * @level: the imbrication level for indenting + * @format: is formatting allowed + * @encoding: an optional encoding string + * + * Dump an XHTML node, recursive behaviour, children are printed too. + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + */ +static void +xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int level, int format, const char *encoding) { + int i; + xmlNodePtr tmp; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeDumpOutput : node == NULL\n"); +#endif + return; + } + if (cur->type == XML_XINCLUDE_START) + return; + if (cur->type == XML_XINCLUDE_END) + return; + if (cur->type == XML_DTD_NODE) { + xmlDtdDumpOutput(buf, (xmlDtdPtr) cur, encoding); + return; + } + if (cur->type == XML_ELEMENT_DECL) { + xmlDumpElementDecl(buf->buffer, (xmlElementPtr) cur); + return; + } + if (cur->type == XML_ATTRIBUTE_DECL) { + xmlDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); + return; + } + if (cur->type == XML_ENTITY_DECL) { + xmlDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); + return; + } + if (cur->type == XML_TEXT_NODE) { + if (cur->content != NULL) { + if ((cur->name == xmlStringText) || + (cur->name != xmlStringTextNoenc)) { + xmlChar *buffer; + + if (encoding == NULL) + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + else + buffer = xmlEncodeSpecialChars(doc, cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } else { + /* + * Disable escaping, needed for XSLT + */ + xmlOutputBufferWriteString(buf, (const char *) cur->content); + } + } + + return; + } + if (cur->type == XML_PI_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, "<?"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, " "); + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + xmlOutputBufferWriteString(buf, "?>"); + } else { + xmlOutputBufferWriteString(buf, "<?"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, "?>"); + } + return; + } + if (cur->type == XML_COMMENT_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, "<!--"); + xmlOutputBufferWriteString(buf, (const char *)cur->content); + xmlOutputBufferWriteString(buf, "-->"); + } + return; + } + if (cur->type == XML_ENTITY_REF_NODE) { + xmlOutputBufferWriteString(buf, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ";"); + return; + } + if (cur->type == XML_CDATA_SECTION_NODE) { + xmlOutputBufferWriteString(buf, "<![CDATA["); + if (cur->content != NULL) + xmlOutputBufferWriteString(buf, (const char *)cur->content); + xmlOutputBufferWriteString(buf, "]]>"); + return; + } + + if (format == 1) { + tmp = cur->children; + while (tmp != NULL) { + if ((tmp->type == XML_TEXT_NODE) || + (tmp->type == XML_ENTITY_REF_NODE)) { + format = 0; + break; + } + tmp = tmp->next; + } + } + xmlOutputBufferWriteString(buf, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->nsDef) + xmlNsListDumpOutput(buf, cur->nsDef); + if ((xmlStrEqual(cur->name, BAD_CAST "html") && + (cur->ns == NULL) && (cur->nsDef == NULL))) { + /* + * 3.1.1. Strictly Conforming Documents A.3.1.1 3/ + */ + xmlOutputBufferWriteString(buf, + " xmlns=\"http://www.w3.org/1999/xhtml\""); + } + if (cur->properties != NULL) + xhtmlAttrListDumpOutput(buf, doc, cur->properties, encoding); + + if ((cur->type == XML_ELEMENT_NODE) && (cur->children == NULL)) { + if (((cur->ns == NULL) || (cur->ns->prefix == NULL)) && + (xhtmlIsEmpty(cur) == 1)) { + /* + * C.2. Empty Elements + */ + xmlOutputBufferWriteString(buf, " />"); + } else { + /* + * C.3. Element Minimization and Empty Element Content + */ + xmlOutputBufferWriteString(buf, "></"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); + } + return; + } + xmlOutputBufferWriteString(buf, ">"); + if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) { + xmlChar *buffer; + + if (encoding == NULL) + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + else + buffer = xmlEncodeSpecialChars(doc, cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } + + /* + * 4.8. Script and Style elements + */ + if ((cur->type == XML_ELEMENT_NODE) && + ((xmlStrEqual(cur->name, BAD_CAST "script")) || + (xmlStrEqual(cur->name, BAD_CAST "style"))) && + ((cur->ns == NULL) || + (xmlStrEqual(cur->ns->href, XHTML_NS_NAME)))) { + xmlNodePtr child = cur->children; + + while (child != NULL) { + if ((child->type == XML_TEXT_NODE) || + (child->type == XML_CDATA_SECTION_NODE)) { + /* + * Apparently CDATA escaping for style just break on IE, + * mozilla and galeon, so ... + */ + if (xmlStrEqual(cur->name, BAD_CAST "style") && + (xmlStrchr(child->content, '<') == NULL) && + (xmlStrchr(child->content, '>') == NULL) && + (xmlStrchr(child->content, '&') == NULL)) { + xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding); + } else { + xmlOutputBufferWriteString(buf, "<![CDATA["); + if (child->content != NULL) + xmlOutputBufferWriteString(buf, + (const char *)child->content); + xmlOutputBufferWriteString(buf, "]]>"); + } + } else { + xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding); + } + child = child->next; + } + } else if (cur->children != NULL) { + if (format) xmlOutputBufferWriteString(buf, "\n"); + xhtmlNodeListDumpOutput(buf, doc, cur->children, + (level >= 0?level+1:-1), format, encoding); + if ((xmlIndentTreeOutput) && (format)) + for (i = 0;i < level;i++) + xmlOutputBufferWriteString(buf, xmlTreeIndentString); + } + xmlOutputBufferWriteString(buf, "</"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); +} +#endif + +/************************************************************************ + * * + * Saving functions front-ends * + * * + ************************************************************************/ + +/** + * xmlDocDumpFormatMemoryEnc: + * @out_doc: Document to generate XML text from + * @doc_txt_ptr: Memory pointer for allocated XML text + * @doc_txt_len: Length of the generated XML text + * @txt_encoding: Character encoding to use when generating XML text + * @format: should formatting spaces been added + * + * Dump the current DOM tree into memory using the character encoding specified + * by the caller. Note it is up to the caller of this function to free the + * allocated memory with xmlFree(). + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + */ + +void +xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, xmlChar **doc_txt_ptr, + int * doc_txt_len, const char * txt_encoding, + int format) { + int dummy = 0; + + xmlCharEncoding doc_charset; + xmlOutputBufferPtr out_buff = NULL; + xmlCharEncodingHandlerPtr conv_hdlr = NULL; + + if (doc_txt_len == NULL) { + doc_txt_len = &dummy; /* Continue, caller just won't get length */ + } + + if (doc_txt_ptr == NULL) { + *doc_txt_len = 0; + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: Null return buffer pointer."); + return; + } + + *doc_txt_ptr = NULL; + *doc_txt_len = 0; + + if (out_doc == NULL) { + /* No document, no output */ + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: Null DOM tree document pointer.\n"); + return; + } + + /* + * Validate the encoding value, if provided. + * This logic is copied from xmlSaveFileEnc. + */ + + if (txt_encoding == NULL) + txt_encoding = (const char *) out_doc->encoding; + if (txt_encoding != NULL) { + doc_charset = xmlParseCharEncoding(txt_encoding); + + if (out_doc->charset != XML_CHAR_ENCODING_UTF8) { + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: Source document not in UTF8\n"); + return; + + } else if (doc_charset != XML_CHAR_ENCODING_UTF8) { + conv_hdlr = xmlFindCharEncodingHandler(txt_encoding); + if ( conv_hdlr == NULL ) { + xmlGenericError(xmlGenericErrorContext, + "%s: %s %s '%s'\n", + "xmlDocDumpFormatMemoryEnc", + "Failed to identify encoding handler for", + "character set", + txt_encoding); + return; + } + } + } + + if ((out_buff = xmlAllocOutputBuffer(conv_hdlr)) == NULL ) { + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: Failed to allocate output buffer.\n"); + return; + } + + xmlDocContentDumpOutput(out_buff, out_doc, txt_encoding, format); + xmlOutputBufferFlush(out_buff); + if (out_buff->conv != NULL) { + *doc_txt_len = out_buff->conv->use; + *doc_txt_ptr = xmlStrndup(out_buff->conv->content, *doc_txt_len); + } else { + *doc_txt_len = out_buff->buffer->use; + *doc_txt_ptr = xmlStrndup(out_buff->buffer->content, *doc_txt_len); + } + (void)xmlOutputBufferClose(out_buff); + + if ((*doc_txt_ptr == NULL) && (*doc_txt_len > 0)) { + *doc_txt_len = 0; + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: %s\n", + "Failed to allocate memory for document text representation."); + } + + return; +} + +/** + * xmlDocDumpMemory: + * @cur: the document + * @mem: OUT: the memory pointer + * @size: OUT: the memory length + * + * Dump an XML document in memory and return the #xmlChar * and it's size. + * It's up to the caller to free the memory with xmlFree(). + */ +void +xmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { + xmlDocDumpFormatMemoryEnc(cur, mem, size, NULL, 0); +} + +/** + * xmlDocDumpFormatMemory: + * @cur: the document + * @mem: OUT: the memory pointer + * @size: OUT: the memory length + * @format: should formatting spaces been added + * + * + * Dump an XML document in memory and return the #xmlChar * and it's size. + * It's up to the caller to free the memory with xmlFree(). + * Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 + * or xmlKeepBlanksDefault(0) was called + */ +void +xmlDocDumpFormatMemory(xmlDocPtr cur, xmlChar**mem, int *size, int format) { + xmlDocDumpFormatMemoryEnc(cur, mem, size, NULL, format); +} + +/** + * xmlDocDumpMemoryEnc: + * @out_doc: Document to generate XML text from + * @doc_txt_ptr: Memory pointer for allocated XML text + * @doc_txt_len: Length of the generated XML text + * @txt_encoding: Character encoding to use when generating XML text + * + * Dump the current DOM tree into memory using the character encoding specified + * by the caller. Note it is up to the caller of this function to free the + * allocated memory with xmlFree(). + */ + +void +xmlDocDumpMemoryEnc(xmlDocPtr out_doc, xmlChar **doc_txt_ptr, + int * doc_txt_len, const char * txt_encoding) { + xmlDocDumpFormatMemoryEnc(out_doc, doc_txt_ptr, doc_txt_len, + txt_encoding, 0); +} + +/** + * xmlGetDocCompressMode: + * @doc: the document + * + * get the compression ratio for a document, ZLIB based + * Returns 0 (uncompressed) to 9 (max compression) + */ +int +xmlGetDocCompressMode (xmlDocPtr doc) { + if (doc == NULL) return(-1); + return(doc->compression); +} + +/** + * xmlSetDocCompressMode: + * @doc: the document + * @mode: the compression ratio + * + * set the compression ratio for a document, ZLIB based + * Correct values: 0 (uncompressed) to 9 (max compression) + */ +void +xmlSetDocCompressMode (xmlDocPtr doc, int mode) { + if (doc == NULL) return; + if (mode < 0) doc->compression = 0; + else if (mode > 9) doc->compression = 9; + else doc->compression = mode; +} + +/** + * xmlGetCompressMode: + * + * get the default compression mode used, ZLIB based. + * Returns 0 (uncompressed) to 9 (max compression) + */ +int +xmlGetCompressMode(void) +{ + return (xmlCompressMode); +} + +/** + * xmlSetCompressMode: + * @mode: the compression ratio + * + * set the default compression mode used, ZLIB based + * Correct values: 0 (uncompressed) to 9 (max compression) + */ +void +xmlSetCompressMode(int mode) { + if (mode < 0) xmlCompressMode = 0; + else if (mode > 9) xmlCompressMode = 9; + else xmlCompressMode = mode; +} + +/** + * xmlDocFormatDump: + * @f: the FILE* + * @cur: the document + * @format: should formatting spaces been added + * + * Dump an XML document to an open FILE. + * + * returns: the number of bytes written or -1 in case of failure. + */ +int +xmlDocFormatDump(FILE *f, xmlDocPtr cur, int format) { + xmlOutputBufferPtr buf; + const char * encoding; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlDocDump : document == NULL\n"); +#endif + return(-1); + } + encoding = (const char *) cur->encoding; + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + xmlGenericError(xmlGenericErrorContext, + "xmlDocDump: document not in UTF8\n"); + return(-1); + } + if (enc != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) { + xmlFree((char *) cur->encoding); + cur->encoding = NULL; + } + } + } + buf = xmlOutputBufferCreateFile(f, handler); + if (buf == NULL) return(-1); + xmlDocContentDumpOutput(buf, cur, NULL, format); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * xmlDocDump: + * @f: the FILE* + * @cur: the document + * + * Dump an XML document to an open FILE. + * + * returns: the number of bytes written or -1 in case of failure. + */ +int +xmlDocDump(FILE *f, xmlDocPtr cur) { + return(xmlDocFormatDump (f, cur, 0)); +} + +/** + * xmlSaveFileTo: + * @buf: an output I/O buffer + * @cur: the document + * @encoding: the encoding if any assuming the I/O layer handles the trancoding + * + * Dump an XML document to an I/O buffer. + * + * returns: the number of bytes written or -1 in case of failure. + */ +int +xmlSaveFileTo(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) { + int ret; + + if (buf == NULL) return(0); + xmlDocContentDumpOutput(buf, cur, encoding, 0); + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * xmlSaveFormatFileTo: + * @buf: an output I/O buffer + * @cur: the document + * @encoding: the encoding if any assuming the I/O layer handles the trancoding + * @format: should formatting spaces been added + * + * Dump an XML document to an I/O buffer. + * + * returns: the number of bytes written or -1 in case of failure. + */ +int +xmlSaveFormatFileTo(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding, int format) { + int ret; + + if (buf == NULL) return(0); + xmlDocContentDumpOutput(buf, cur, encoding, format); + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * xmlSaveFormatFileEnc: + * @filename: the filename or URL to output + * @cur: the document being saved + * @encoding: the name of the encoding to use or NULL. + * @format: should formatting spaces be added. + * + * Dump an XML document to a file or an URL. + * + * Returns the number of bytes written or -1 in case of error. + */ +int +xmlSaveFormatFileEnc( const char * filename, xmlDocPtr cur, + const char * encoding, int format ) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + xmlCharEncoding enc; + int ret; + + if (encoding == NULL) + encoding = (const char *) cur->encoding; + + if (encoding != NULL) { + + enc = xmlParseCharEncoding(encoding); + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveFormatFileEnc: document not in UTF8\n"); + return(-1); + } + if (enc != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + return(-1); + } + } + +#ifdef HAVE_ZLIB_H + if (cur->compression < 0) cur->compression = xmlCompressMode; +#endif + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); + if (buf == NULL) return(-1); + + xmlDocContentDumpOutput(buf, cur, encoding, format); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + + +/** + * xmlSaveFileEnc: + * @filename: the filename (or URL) + * @cur: the document + * @encoding: the name of an encoding (or NULL) + * + * Dump an XML document, converting it to the given encoding + * + * returns: the number of bytes written or -1 in case of failure. + */ +int +xmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { + return ( xmlSaveFormatFileEnc( filename, cur, encoding, 0 ) ); +} + +/** + * xmlSaveFormatFile: + * @filename: the filename (or URL) + * @cur: the document + * @format: should formatting spaces been added + * + * Dump an XML document to a file. Will use compression if + * compiled in and enabled. If @filename is "-" the stdout file is + * used. If @format is set then the document will be indented on output. + * + * returns: the number of bytes written or -1 in case of failure. + */ +int +xmlSaveFormatFile(const char *filename, xmlDocPtr cur, int format) { + return ( xmlSaveFormatFileEnc( filename, cur, NULL, format ) ); +} + +/** + * xmlSaveFile: + * @filename: the filename (or URL) + * @cur: the document + * + * Dump an XML document to a file. Will use compression if + * compiled in and enabled. If @filename is "-" the stdout file is + * used. + * returns: the number of bytes written or -1 in case of failure. + */ +int +xmlSaveFile(const char *filename, xmlDocPtr cur) { + return(xmlSaveFormatFileEnc(filename, cur, NULL, 0)); +} + diff --git a/bundle/libxml/trio.c b/bundle/libxml/trio.c new file mode 100644 index 0000000000..7593e58180 --- /dev/null +++ b/bundle/libxml/trio.c @@ -0,0 +1,6760 @@ +/************************************************************************* + * + * $Id$ + * + * Copyright (C) 1998 Bjorn Reese and Daniel Stenberg. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + ************************************************************************* + * + * A note to trio contributors: + * + * Avoid heap allocation at all costs to ensure that the trio functions + * are async-safe. The exceptions are the printf/fprintf functions, which + * uses fputc, and the asprintf functions and the <alloc> modifier, which + * by design are required to allocate form the heap. + * + ************************************************************************/ + +/* + * TODO: + * - Scan is probably too permissive about its modifiers. + * - C escapes in %#[] ? + * - Multibyte characters (done for format parsing, except scan groups) + * - Complex numbers? (C99 _Complex) + * - Boolean values? (C99 _Bool) + * - C99 NaN(n-char-sequence) missing + * - Should we support the GNU %a alloc modifier? GNU has an ugly hack + * for %a, because C99 used %a for other purposes. If specified as + * %as or %a[ it is interpreted as the alloc modifier, otherwise as + * the C99 hex-float. This means that you cannot scan %as as a hex-float + * immediately followed by an 's'. + * - Scanning of collating symbols. + */ + +/************************************************************************* + * Trio include files + */ +#include "triodef.h" +#include "trio.h" +#include "triop.h" +#include "trionan.h" +#if !defined(TRIO_MINIMAL) +# include "triostr.h" +#endif + +/************************************************************************** + * + * Definitions + * + *************************************************************************/ + +#if defined(__STDC_ISO_10646__) || defined(MB_LEN_MAX) || defined(USE_MULTIBYTE) || TRIO_WIDECHAR +# define TRIO_COMPILER_SUPPORTS_MULTIBYTE +# if !defined(MB_LEN_MAX) +# define MB_LEN_MAX 6 +# endif +#endif + +/************************************************************************* + * Generic definitions + */ + +#if !(defined(DEBUG) || defined(NDEBUG)) +# define NDEBUG +#endif +#include <assert.h> +#include <ctype.h> +#if !defined(TRIO_COMPILER_SUPPORTS_C99) +# define isblank(x) (((x)==32) || ((x)==9)) +#endif +#include <math.h> +#include <limits.h> +#include <float.h> +#if defined(TRIO_COMPILER_ANCIENT) +# include <varargs.h> +#else +# include <stdarg.h> +#endif +#include <stddef.h> +#include <errno.h> + +#ifndef NULL +# define NULL 0 +#endif +#define NIL ((char)0) +#ifndef FALSE +# define FALSE (1 == 0) +# define TRUE (! FALSE) +#endif +#define BOOLEAN_T int + +/* mincore() can be used for debugging purposes */ +#define VALID(x) (NULL != (x)) + +#if TRIO_ERRORS + /* + * Encode the error code and the position. This is decoded + * with TRIO_ERROR_CODE and TRIO_ERROR_POSITION. + */ +# define TRIO_ERROR_RETURN(x,y) (- ((x) + ((y) << 8))) +#else +# define TRIO_ERROR_RETURN(x,y) (-1) +#endif + + +/************************************************************************* + * Platform specific definitions + */ +#if defined(TRIO_PLATFORM_UNIX) +# include <unistd.h> +# include <signal.h> +# include <locale.h> +# define USE_LOCALE +#endif /* TRIO_PLATFORM_UNIX */ +#if defined(TRIO_PLATFORM_VMS) +# include <unistd.h> +#endif +#if defined(TRIO_PLATFORM_WIN32) +# include <io.h> +# define read _read +# define write _write +#endif /* TRIO_PLATFORM_WIN32 */ + +#define TRIO_MSVC_VERSION_5 1100 + +#if TRIO_WIDECHAR +# if defined(TRIO_COMPILER_SUPPORTS_ISO94) +# include <wchar.h> +# include <wctype.h> +typedef wchar_t trio_wchar_t; +typedef wint_t trio_wint_t; +# else +typedef char trio_wchar_t; +typedef int trio_wint_t; +# define WCONST(x) L ## x +# define WEOF EOF +# define iswalnum(x) isalnum(x) +# define iswalpha(x) isalpha(x) +# define iswblank(x) isblank(x) +# define iswcntrl(x) iscntrl(x) +# define iswdigit(x) isdigit(x) +# define iswgraph(x) isgraph(x) +# define iswlower(x) islower(x) +# define iswprint(x) isprint(x) +# define iswpunct(x) ispunct(x) +# define iswspace(x) isspace(x) +# define iswupper(x) isupper(x) +# define iswxdigit(x) isxdigit(x) +# endif +#endif + + +/************************************************************************* + * Compiler dependent definitions + */ + +/* Support for long long */ +#ifndef __cplusplus +# if !defined(USE_LONGLONG) +# if defined(TRIO_COMPILER_GCC) && !defined(__STRICT_ANSI__) +# define USE_LONGLONG +# elif defined(TRIO_COMPILER_SUNPRO) +# define USE_LONGLONG +# elif defined(_LONG_LONG) || defined(_LONGLONG) +# define USE_LONGLONG +# endif +# endif +#endif + +/* The extra long numbers */ +#if defined(USE_LONGLONG) +typedef signed long long int trio_longlong_t; +typedef unsigned long long int trio_ulonglong_t; +#elif defined(TRIO_COMPILER_MSVC) +# if (_MSC_VER >= TRIO_MSVC_VERSION_5) +typedef signed __int64 trio_longlong_t; +typedef unsigned __int64 trio_ulonglong_t; +# else +typedef signed long int trio_longlong_t; +typedef unsigned long int trio_ulonglong_t; +# endif +#else +typedef TRIO_SIGNED long int trio_longlong_t; +typedef unsigned long int trio_ulonglong_t; +#endif + +/* Maximal and fixed integer types */ +#if defined(TRIO_COMPILER_SUPPORTS_C99) +# include <stdint.h> +typedef intmax_t trio_intmax_t; +typedef uintmax_t trio_uintmax_t; +typedef int8_t trio_int8_t; +typedef int16_t trio_int16_t; +typedef int32_t trio_int32_t; +typedef int64_t trio_int64_t; +#elif defined(TRIO_COMPILER_SUPPORTS_UNIX98) +# include <inttypes.h> +typedef intmax_t trio_intmax_t; +typedef uintmax_t trio_uintmax_t; +typedef int8_t trio_int8_t; +typedef int16_t trio_int16_t; +typedef int32_t trio_int32_t; +typedef int64_t trio_int64_t; +#elif defined(TRIO_COMPILER_MSVC) && (_MSC_VER >= TRIO_MSVC_VERSION_5) +typedef trio_longlong_t trio_intmax_t; +typedef trio_ulonglong_t trio_uintmax_t; +typedef __int8 trio_int8_t; +typedef __int16 trio_int16_t; +typedef __int32 trio_int32_t; +typedef __int64 trio_int64_t; +#else +typedef trio_longlong_t trio_intmax_t; +typedef trio_ulonglong_t trio_uintmax_t; +# if defined(TRIO_INT8_T) +typedef TRIO_INT8_T trio_int8_t; +# else +typedef TRIO_SIGNED char trio_int8_t; +# endif +# if defined(TRIO_INT16_T) +typedef TRIO_INT16_T trio_int16_t; +# else +typedef TRIO_SIGNED short trio_int16_t; +# endif +# if defined(TRIO_INT32_T) +typedef TRIO_INT32_T trio_int32_t; +# else +typedef TRIO_SIGNED int trio_int32_t; +# endif +# if defined(TRIO_INT64_T) +typedef TRIO_INT64_T trio_int64_t; +# else +typedef trio_longlong_t trio_int64_t; +# endif +#endif + +#if !(defined(TRIO_COMPILER_SUPPORTS_C99) \ + || defined(TRIO_COMPILER_SUPPORTS_UNIX01)) +# define floorl(x) floor((double)(x)) +# define fmodl(x,y) fmod((double)(x),(double)(y)) +# define powl(x,y) pow((double)(x),(double)(y)) +#endif + +#define TRIO_FABS(x) (((x) < 0.0) ? -(x) : (x)) + +/************************************************************************* + * Internal Definitions + */ + +#ifndef DECIMAL_DIG +# define DECIMAL_DIG DBL_DIG +#endif + +/* Long double sizes */ +#ifdef LDBL_DIG +# define MAX_MANTISSA_DIGITS LDBL_DIG +# define MAX_EXPONENT_DIGITS 4 +# define MAX_DOUBLE_DIGITS LDBL_MAX_10_EXP +#else +# define MAX_MANTISSA_DIGITS DECIMAL_DIG +# define MAX_EXPONENT_DIGITS 3 +# define MAX_DOUBLE_DIGITS DBL_MAX_10_EXP +#endif + +#if defined(TRIO_COMPILER_ANCIENT) || !defined(LDBL_DIG) +# undef LDBL_DIG +# undef LDBL_MANT_DIG +# undef LDBL_EPSILON +# define LDBL_DIG DBL_DIG +# define LDBL_MANT_DIG DBL_MANT_DIG +# define LDBL_EPSILON DBL_EPSILON +#endif + +/* The maximal number of digits is for base 2 */ +#define MAX_CHARS_IN(x) (sizeof(x) * CHAR_BIT) +/* The width of a pointer. The number of bits in a hex digit is 4 */ +#define POINTER_WIDTH ((sizeof("0x") - 1) + sizeof(trio_pointer_t) * CHAR_BIT / 4) + +/* Infinite and Not-A-Number for floating-point */ +#define INFINITE_LOWER "inf" +#define INFINITE_UPPER "INF" +#define LONG_INFINITE_LOWER "infinite" +#define LONG_INFINITE_UPPER "INFINITE" +#define NAN_LOWER "nan" +#define NAN_UPPER "NAN" + +/* Various constants */ +enum { + TYPE_PRINT = 1, + TYPE_SCAN = 2, + + /* Flags. Use maximum 32 */ + FLAGS_NEW = 0, + FLAGS_STICKY = 1, + FLAGS_SPACE = 2 * FLAGS_STICKY, + FLAGS_SHOWSIGN = 2 * FLAGS_SPACE, + FLAGS_LEFTADJUST = 2 * FLAGS_SHOWSIGN, + FLAGS_ALTERNATIVE = 2 * FLAGS_LEFTADJUST, + FLAGS_SHORT = 2 * FLAGS_ALTERNATIVE, + FLAGS_SHORTSHORT = 2 * FLAGS_SHORT, + FLAGS_LONG = 2 * FLAGS_SHORTSHORT, + FLAGS_QUAD = 2 * FLAGS_LONG, + FLAGS_LONGDOUBLE = 2 * FLAGS_QUAD, + FLAGS_SIZE_T = 2 * FLAGS_LONGDOUBLE, + FLAGS_PTRDIFF_T = 2 * FLAGS_SIZE_T, + FLAGS_INTMAX_T = 2 * FLAGS_PTRDIFF_T, + FLAGS_NILPADDING = 2 * FLAGS_INTMAX_T, + FLAGS_UNSIGNED = 2 * FLAGS_NILPADDING, + FLAGS_UPPER = 2 * FLAGS_UNSIGNED, + FLAGS_WIDTH = 2 * FLAGS_UPPER, + FLAGS_WIDTH_PARAMETER = 2 * FLAGS_WIDTH, + FLAGS_PRECISION = 2 * FLAGS_WIDTH_PARAMETER, + FLAGS_PRECISION_PARAMETER = 2 * FLAGS_PRECISION, + FLAGS_BASE = 2 * FLAGS_PRECISION_PARAMETER, + FLAGS_BASE_PARAMETER = 2 * FLAGS_BASE, + FLAGS_FLOAT_E = 2 * FLAGS_BASE_PARAMETER, + FLAGS_FLOAT_G = 2 * FLAGS_FLOAT_E, + FLAGS_QUOTE = 2 * FLAGS_FLOAT_G, + FLAGS_WIDECHAR = 2 * FLAGS_QUOTE, + FLAGS_ALLOC = 2 * FLAGS_WIDECHAR, + FLAGS_IGNORE = 2 * FLAGS_ALLOC, + FLAGS_IGNORE_PARAMETER = 2 * FLAGS_IGNORE, + FLAGS_VARSIZE_PARAMETER = 2 * FLAGS_IGNORE_PARAMETER, + FLAGS_FIXED_SIZE = 2 * FLAGS_VARSIZE_PARAMETER, + /* Reused flags */ + FLAGS_EXCLUDE = FLAGS_SHORT, + FLAGS_USER_DEFINED = FLAGS_IGNORE, + FLAGS_ROUNDING = FLAGS_INTMAX_T, + /* Compounded flags */ + FLAGS_ALL_VARSIZES = FLAGS_LONG | FLAGS_QUAD | FLAGS_INTMAX_T | FLAGS_PTRDIFF_T | FLAGS_SIZE_T, + FLAGS_ALL_SIZES = FLAGS_ALL_VARSIZES | FLAGS_SHORTSHORT | FLAGS_SHORT, + + NO_POSITION = -1, + NO_WIDTH = 0, + NO_PRECISION = -1, + NO_SIZE = -1, + + /* Do not change these */ + NO_BASE = -1, + MIN_BASE = 2, + MAX_BASE = 36, + BASE_BINARY = 2, + BASE_OCTAL = 8, + BASE_DECIMAL = 10, + BASE_HEX = 16, + + /* Maximal number of allowed parameters */ + MAX_PARAMETERS = 64, + /* Maximal number of characters in class */ + MAX_CHARACTER_CLASS = UCHAR_MAX + 1, + + /* Maximal string lengths for user-defined specifiers */ + MAX_USER_NAME = 64, + MAX_USER_DATA = 256, + + /* Maximal length of locale separator strings */ + MAX_LOCALE_SEPARATOR_LENGTH = MB_LEN_MAX, + /* Maximal number of integers in grouping */ + MAX_LOCALE_GROUPS = 64, + + /* Initial size of asprintf buffer */ + DYNAMIC_START_SIZE = 32 +}; + +#define NO_GROUPING ((int)CHAR_MAX) + +/* Fundamental formatting parameter types */ +#define FORMAT_UNKNOWN 0 +#define FORMAT_INT 1 +#define FORMAT_DOUBLE 2 +#define FORMAT_CHAR 3 +#define FORMAT_STRING 4 +#define FORMAT_POINTER 5 +#define FORMAT_COUNT 6 +#define FORMAT_PARAMETER 7 +#define FORMAT_GROUP 8 +#if TRIO_GNU +# define FORMAT_ERRNO 9 +#endif +#if TRIO_EXTENSION +# define FORMAT_USER_DEFINED 10 +#endif + +/* Character constants */ +#define CHAR_IDENTIFIER '%' +#define CHAR_BACKSLASH '\\' +#define CHAR_QUOTE '\"' +#define CHAR_ADJUST ' ' + +/* Character class expressions */ +#define CLASS_ALNUM "[:alnum:]" +#define CLASS_ALPHA "[:alpha:]" +#define CLASS_BLANK "[:blank:]" +#define CLASS_CNTRL "[:cntrl:]" +#define CLASS_DIGIT "[:digit:]" +#define CLASS_GRAPH "[:graph:]" +#define CLASS_LOWER "[:lower:]" +#define CLASS_PRINT "[:print:]" +#define CLASS_PUNCT "[:punct:]" +#define CLASS_SPACE "[:space:]" +#define CLASS_UPPER "[:upper:]" +#define CLASS_XDIGIT "[:xdigit:]" + +/* + * SPECIFIERS: + * + * + * a Hex-float + * A Hex-float + * c Character + * C Widechar character (wint_t) + * d Decimal + * e Float + * E Float + * F Float + * F Float + * g Float + * G Float + * i Integer + * m Error message + * n Count + * o Octal + * p Pointer + * s String + * S Widechar string (wchar_t *) + * u Unsigned + * x Hex + * X Hex + * [] Group + * <> User-defined + * + * Reserved: + * + * D Binary Coded Decimal %D(length,precision) (OS/390) + */ +#define SPECIFIER_CHAR 'c' +#define SPECIFIER_STRING 's' +#define SPECIFIER_DECIMAL 'd' +#define SPECIFIER_INTEGER 'i' +#define SPECIFIER_UNSIGNED 'u' +#define SPECIFIER_OCTAL 'o' +#define SPECIFIER_HEX 'x' +#define SPECIFIER_HEX_UPPER 'X' +#define SPECIFIER_FLOAT_E 'e' +#define SPECIFIER_FLOAT_E_UPPER 'E' +#define SPECIFIER_FLOAT_F 'f' +#define SPECIFIER_FLOAT_F_UPPER 'F' +#define SPECIFIER_FLOAT_G 'g' +#define SPECIFIER_FLOAT_G_UPPER 'G' +#define SPECIFIER_POINTER 'p' +#define SPECIFIER_GROUP '[' +#define SPECIFIER_UNGROUP ']' +#define SPECIFIER_COUNT 'n' +#if TRIO_UNIX98 +# define SPECIFIER_CHAR_UPPER 'C' +# define SPECIFIER_STRING_UPPER 'S' +#endif +#if TRIO_C99 +# define SPECIFIER_HEXFLOAT 'a' +# define SPECIFIER_HEXFLOAT_UPPER 'A' +#endif +#if TRIO_GNU +# define SPECIFIER_ERRNO 'm' +#endif +#if TRIO_EXTENSION +# define SPECIFIER_BINARY 'b' +# define SPECIFIER_BINARY_UPPER 'B' +# define SPECIFIER_USER_DEFINED_BEGIN '<' +# define SPECIFIER_USER_DEFINED_END '>' +# define SPECIFIER_USER_DEFINED_SEPARATOR ':' +#endif + +/* + * QUALIFIERS: + * + * + * Numbers = d,i,o,u,x,X + * Float = a,A,e,E,f,F,g,G + * String = s + * Char = c + * + * + * 9$ Position + * Use the 9th parameter. 9 can be any number between 1 and + * the maximal argument + * + * 9 Width + * Set width to 9. 9 can be any number, but must not be postfixed + * by '$' + * + * h Short + * Numbers: + * (unsigned) short int + * + * hh Short short + * Numbers: + * (unsigned) char + * + * l Long + * Numbers: + * (unsigned) long int + * String: + * as the S specifier + * Char: + * as the C specifier + * + * ll Long Long + * Numbers: + * (unsigned) long long int + * + * L Long Double + * Float + * long double + * + * # Alternative + * Float: + * Decimal-point is always present + * String: + * non-printable characters are handled as \number + * + * Spacing + * + * + Sign + * + * - Alignment + * + * . Precision + * + * * Parameter + * print: use parameter + * scan: no parameter (ignore) + * + * q Quad + * + * Z size_t + * + * w Widechar + * + * ' Thousands/quote + * Numbers: + * Integer part grouped in thousands + * Binary numbers: + * Number grouped in nibbles (4 bits) + * String: + * Quoted string + * + * j intmax_t + * t prtdiff_t + * z size_t + * + * ! Sticky + * @ Parameter (for both print and scan) + * + * I n-bit Integer + * Numbers: + * The following options exists + * I8 = 8-bit integer + * I16 = 16-bit integer + * I32 = 32-bit integer + * I64 = 64-bit integer + */ +#define QUALIFIER_POSITION '$' +#define QUALIFIER_SHORT 'h' +#define QUALIFIER_LONG 'l' +#define QUALIFIER_LONG_UPPER 'L' +#define QUALIFIER_ALTERNATIVE '#' +#define QUALIFIER_SPACE ' ' +#define QUALIFIER_PLUS '+' +#define QUALIFIER_MINUS '-' +#define QUALIFIER_DOT '.' +#define QUALIFIER_STAR '*' +#define QUALIFIER_CIRCUMFLEX '^' /* For scanlists */ +#if TRIO_C99 +# define QUALIFIER_SIZE_T 'z' +# define QUALIFIER_PTRDIFF_T 't' +# define QUALIFIER_INTMAX_T 'j' +#endif +#if TRIO_BSD || TRIO_GNU +# define QUALIFIER_QUAD 'q' +#endif +#if TRIO_GNU +# define QUALIFIER_SIZE_T_UPPER 'Z' +#endif +#if TRIO_MISC +# define QUALIFIER_WIDECHAR 'w' +#endif +#if TRIO_MICROSOFT +# define QUALIFIER_FIXED_SIZE 'I' +#endif +#if TRIO_EXTENSION +# define QUALIFIER_QUOTE '\'' +# define QUALIFIER_STICKY '!' +# define QUALIFIER_VARSIZE '&' /* This should remain undocumented */ +# define QUALIFIER_PARAM '@' /* Experimental */ +# define QUALIFIER_COLON ':' /* For scanlists */ +# define QUALIFIER_EQUAL '=' /* For scanlists */ +# define QUALIFIER_ROUNDING_UPPER 'R' +#endif + + +/************************************************************************* + * + * Internal Structures + * + *************************************************************************/ + +/* Parameters */ +typedef struct { + /* An indication of which entry in the data union is used */ + int type; + /* The flags */ + unsigned long flags; + /* The width qualifier */ + int width; + /* The precision qualifier */ + int precision; + /* The base qualifier */ + int base; + /* The size for the variable size qualifier */ + int varsize; + /* The marker of the end of the specifier */ + int indexAfterSpecifier; + /* The data from the argument list */ + union { + char *string; +#if TRIO_WIDECHAR + trio_wchar_t *wstring; +#endif + trio_pointer_t pointer; + union { + trio_intmax_t as_signed; + trio_uintmax_t as_unsigned; + } number; + double doubleNumber; + double *doublePointer; + trio_long_double_t longdoubleNumber; + trio_long_double_t *longdoublePointer; + int errorNumber; + } data; + /* For the user-defined specifier */ + char user_name[MAX_USER_NAME]; + char user_data[MAX_USER_DATA]; +} trio_parameter_t; + +/* Container for customized functions */ +typedef struct { + union { + trio_outstream_t out; + trio_instream_t in; + } stream; + trio_pointer_t closure; +} trio_custom_t; + +/* General trio "class" */ +typedef struct _trio_class_t { + /* + * The function to write characters to a stream. + */ + void (*OutStream) TRIO_PROTO((struct _trio_class_t *, int)); + /* + * The function to read characters from a stream. + */ + void (*InStream) TRIO_PROTO((struct _trio_class_t *, int *)); + /* + * The current location in the stream. + */ + trio_pointer_t location; + /* + * The character currently being processed. + */ + int current; + /* + * The number of characters that would have been written/read + * if there had been sufficient space. + */ + int processed; + /* + * The number of characters that are actually written/read. + * Processed and committed will only differ for the *nprintf + * and *nscanf functions. + */ + int committed; + /* + * The upper limit of characters that may be written/read. + */ + int max; + /* + * The last output error that was detected. + */ + int error; +} trio_class_t; + +/* References (for user-defined callbacks) */ +typedef struct _trio_reference_t { + trio_class_t *data; + trio_parameter_t *parameter; +} trio_reference_t; + +/* Registered entries (for user-defined callbacks) */ +typedef struct _trio_userdef_t { + struct _trio_userdef_t *next; + trio_callback_t callback; + char *name; +} trio_userdef_t; + + +/************************************************************************* + * + * Internal Variables + * + *************************************************************************/ + +static TRIO_CONST char rcsid[] = "@(#)$Id$"; + +/* + * Need this to workaround a parser bug in HP C/iX compiler that fails + * to resolves macro definitions that includes type 'long double', + * e.g: va_arg(arg_ptr, long double) + */ +#if defined(TRIO_PLATFORM_MPEIX) +static TRIO_CONST trio_long_double_t ___dummy_long_double = 0; +#endif + +static TRIO_CONST char internalNullString[] = "(nil)"; + +#if defined(USE_LOCALE) +static struct lconv *internalLocaleValues = NULL; +#endif + +/* + * UNIX98 says "in a locale where the radix character is not defined, + * the radix character defaults to a period (.)" + */ +static int internalDecimalPointLength = 1; +static int internalThousandSeparatorLength = 1; +static char internalDecimalPoint = '.'; +static char internalDecimalPointString[MAX_LOCALE_SEPARATOR_LENGTH + 1] = "."; +static char internalThousandSeparator[MAX_LOCALE_SEPARATOR_LENGTH + 1] = ","; +static char internalGrouping[MAX_LOCALE_GROUPS] = { (char)NO_GROUPING }; + +static TRIO_CONST char internalDigitsLower[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +static TRIO_CONST char internalDigitsUpper[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +static BOOLEAN_T internalDigitsUnconverted = TRUE; +static int internalDigitArray[128]; +#if TRIO_EXTENSION +static BOOLEAN_T internalCollationUnconverted = TRUE; +static char internalCollationArray[MAX_CHARACTER_CLASS][MAX_CHARACTER_CLASS]; +#endif + +#if TRIO_EXTENSION +static TRIO_VOLATILE trio_callback_t internalEnterCriticalRegion = NULL; +static TRIO_VOLATILE trio_callback_t internalLeaveCriticalRegion = NULL; +static trio_userdef_t *internalUserDef = NULL; +#endif + + +/************************************************************************* + * + * Internal Functions + * + ************************************************************************/ + +#if defined(TRIO_MINIMAL) +# define TRIO_STRING_PUBLIC static +# include "triostr.c" +#endif /* defined(TRIO_MINIMAL) */ + +/************************************************************************* + * TrioIsQualifier + * + * Description: + * Remember to add all new qualifiers to this function. + * QUALIFIER_POSITION must not be added. + */ +TRIO_PRIVATE BOOLEAN_T +TrioIsQualifier +TRIO_ARGS1((character), + TRIO_CONST char character) +{ + /* QUALIFIER_POSITION is not included */ + switch (character) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case QUALIFIER_PLUS: + case QUALIFIER_MINUS: + case QUALIFIER_SPACE: + case QUALIFIER_DOT: + case QUALIFIER_STAR: + case QUALIFIER_ALTERNATIVE: + case QUALIFIER_SHORT: + case QUALIFIER_LONG: + case QUALIFIER_LONG_UPPER: + case QUALIFIER_CIRCUMFLEX: +#if defined(QUALIFIER_SIZE_T) + case QUALIFIER_SIZE_T: +#endif +#if defined(QUALIFIER_PTRDIFF_T) + case QUALIFIER_PTRDIFF_T: +#endif +#if defined(QUALIFIER_INTMAX_T) + case QUALIFIER_INTMAX_T: +#endif +#if defined(QUALIFIER_QUAD) + case QUALIFIER_QUAD: +#endif +#if defined(QUALIFIER_SIZE_T_UPPER) + case QUALIFIER_SIZE_T_UPPER: +#endif +#if defined(QUALIFIER_WIDECHAR) + case QUALIFIER_WIDECHAR: +#endif +#if defined(QUALIFIER_QUOTE) + case QUALIFIER_QUOTE: +#endif +#if defined(QUALIFIER_STICKY) + case QUALIFIER_STICKY: +#endif +#if defined(QUALIFIER_VARSIZE) + case QUALIFIER_VARSIZE: +#endif +#if defined(QUALIFIER_PARAM) + case QUALIFIER_PARAM: +#endif +#if defined(QUALIFIER_FIXED_SIZE) + case QUALIFIER_FIXED_SIZE: +#endif +#if defined(QUALIFIER_ROUNDING_UPPER) + case QUALIFIER_ROUNDING_UPPER: +#endif + return TRUE; + default: + return FALSE; + } +} + +/************************************************************************* + * TrioSetLocale + */ +#if defined(USE_LOCALE) +TRIO_PRIVATE void +TrioSetLocale(TRIO_NOARGS) +{ + internalLocaleValues = (struct lconv *)localeconv(); + if (internalLocaleValues) + { + if ((internalLocaleValues->decimal_point) && + (internalLocaleValues->decimal_point[0] != NIL)) + { + internalDecimalPointLength = trio_length(internalLocaleValues->decimal_point); + if (internalDecimalPointLength == 1) + { + internalDecimalPoint = internalLocaleValues->decimal_point[0]; + } + else + { + internalDecimalPoint = NIL; + trio_copy_max(internalDecimalPointString, + sizeof(internalDecimalPointString), + internalLocaleValues->decimal_point); + } + } + if ((internalLocaleValues->thousands_sep) && + (internalLocaleValues->thousands_sep[0] != NIL)) + { + trio_copy_max(internalThousandSeparator, + sizeof(internalThousandSeparator), + internalLocaleValues->thousands_sep); + internalThousandSeparatorLength = trio_length(internalThousandSeparator); + } + if ((internalLocaleValues->grouping) && + (internalLocaleValues->grouping[0] != NIL)) + { + trio_copy_max(internalGrouping, + sizeof(internalGrouping), + internalLocaleValues->grouping); + } + } +} +#endif /* defined(USE_LOCALE) */ + +TRIO_PRIVATE int +TrioCalcThousandSeparatorLength +TRIO_ARGS1((digits), + int digits) +{ +#if TRIO_EXTENSION + int count = 0; + int step = NO_GROUPING; + char *groupingPointer = internalGrouping; + + while (digits > 0) + { + if (*groupingPointer == CHAR_MAX) + { + /* Disable grouping */ + break; /* while */ + } + else if (*groupingPointer == 0) + { + /* Repeat last group */ + if (step == NO_GROUPING) + { + /* Error in locale */ + break; /* while */ + } + } + else + { + step = *groupingPointer++; + } + if (digits > step) + count += internalThousandSeparatorLength; + digits -= step; + } + return count; +#else + return 0; +#endif +} + +TRIO_PRIVATE BOOLEAN_T +TrioFollowedBySeparator +TRIO_ARGS1((position), + int position) +{ +#if TRIO_EXTENSION + int step = 0; + char *groupingPointer = internalGrouping; + + position--; + if (position == 0) + return FALSE; + while (position > 0) + { + if (*groupingPointer == CHAR_MAX) + { + /* Disable grouping */ + break; /* while */ + } + else if (*groupingPointer != 0) + { + step = *groupingPointer++; + } + if (step == 0) + break; + position -= step; + } + return (position == 0); +#else + return FALSE; +#endif +} + +/************************************************************************* + * TrioGetPosition + * + * Get the %n$ position. + */ +TRIO_PRIVATE int +TrioGetPosition +TRIO_ARGS2((format, indexPointer), + TRIO_CONST char *format, + int *indexPointer) +{ +#if TRIO_UNIX98 + char *tmpformat; + int number = 0; + int index = *indexPointer; + + number = (int)trio_to_long(&format[index], &tmpformat, BASE_DECIMAL); + index = (int)(tmpformat - format); + if ((number != 0) && (QUALIFIER_POSITION == format[index++])) + { + *indexPointer = index; + /* + * number is decreased by 1, because n$ starts from 1, whereas + * the array it is indexing starts from 0. + */ + return number - 1; + } +#endif + return NO_POSITION; +} + +#if TRIO_EXTENSION +/************************************************************************* + * TrioFindNamespace + * + * Find registered user-defined specifier. + * The prev argument is used for optimization only. + */ +TRIO_PRIVATE trio_userdef_t * +TrioFindNamespace +TRIO_ARGS2((name, prev), + TRIO_CONST char *name, + trio_userdef_t **prev) +{ + trio_userdef_t *def; + + if (internalEnterCriticalRegion) + (void)internalEnterCriticalRegion(NULL); + + for (def = internalUserDef; def; def = def->next) + { + /* Case-sensitive string comparison */ + if (trio_equal_case(def->name, name)) + break; + + if (prev) + *prev = def; + } + + if (internalLeaveCriticalRegion) + (void)internalLeaveCriticalRegion(NULL); + + return def; +} +#endif + +/************************************************************************* + * TrioPower + * + * Description: + * Calculate pow(base, exponent), where number and exponent are integers. + */ +TRIO_PRIVATE trio_long_double_t +TrioPower +TRIO_ARGS2((number, exponent), + int number, + int exponent) +{ + trio_long_double_t result; + + if (number == 10) + { + switch (exponent) + { + /* Speed up calculation of common cases */ + case 0: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E-1); + break; + case 1: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+0); + break; + case 2: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+1); + break; + case 3: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+2); + break; + case 4: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+3); + break; + case 5: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+4); + break; + case 6: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+5); + break; + case 7: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+6); + break; + case 8: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+7); + break; + case 9: + result = (trio_long_double_t)number * TRIO_SUFFIX_LONG(1E+8); + break; + default: + result = powl((trio_long_double_t)number, + (trio_long_double_t)exponent); + break; + } + } + else + { + return powl((trio_long_double_t)number, (trio_long_double_t)exponent); + } + return result; +} + +/************************************************************************* + * TrioLogarithm + */ +TRIO_PRIVATE double +TrioLogarithm +TRIO_ARGS2((number, base), + double number, + int base) +{ + double result; + + if (number <= 0.0) + { + /* xlC crashes on log(0) */ + result = (number == 0.0) ? trio_ninf() : trio_nan(); + } + else + { + if (base == 10) + { + result = log10(number); + } + else + { + result = log10(number) / log10((double)base); + } + } + return result; +} + +/************************************************************************* + * TrioLogarithmBase + */ +TRIO_PRIVATE double +TrioLogarithmBase +TRIO_ARGS1((base), + int base) +{ + switch (base) + { + case BASE_BINARY : return 1.0; + case BASE_OCTAL : return 3.0; + case BASE_DECIMAL: return 3.321928094887362345; + case BASE_HEX : return 4.0; + default : return TrioLogarithm((double)base, 2); + } +} + +/************************************************************************* + * TrioParse + * + * Description: + * Parse the format string + */ +TRIO_PRIVATE int +TrioParse +TRIO_ARGS5((type, format, parameters, arglist, argarray), + int type, + TRIO_CONST char *format, + trio_parameter_t *parameters, + va_list *arglist, + trio_pointer_t *argarray) +{ + /* Count the number of times a parameter is referenced */ + unsigned short usedEntries[MAX_PARAMETERS]; + /* Parameter counters */ + int parameterPosition; + int currentParam; + int maxParam = -1; + /* Utility variables */ + unsigned long flags; + int width; + int precision; + int varsize; + int base; + int index; /* Index into formatting string */ + int dots; /* Count number of dots in modifier part */ + BOOLEAN_T positional; /* Does the specifier have a positional? */ + BOOLEAN_T gotSticky = FALSE; /* Are there any sticky modifiers at all? */ + /* + * indices specifies the order in which the parameters must be + * read from the va_args (this is necessary to handle positionals) + */ + int indices[MAX_PARAMETERS]; + int pos = 0; + /* Various variables */ + char ch; +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + int charlen; +#endif + int save_errno; + int i = -1; + int num; + char *tmpformat; + + /* One and only one of arglist and argarray must be used */ + assert((arglist != NULL) ^ (argarray != NULL)); + + /* + * The 'parameters' array is not initialized, but we need to + * know which entries we have used. + */ + memset(usedEntries, 0, sizeof(usedEntries)); + + save_errno = errno; + index = 0; + parameterPosition = 0; +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + (void)mblen(NULL, 0); +#endif + + while (format[index]) + { +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + if (! isascii(format[index])) + { + /* + * Multibyte characters cannot be legal specifiers or + * modifiers, so we skip over them. + */ + charlen = mblen(&format[index], MB_LEN_MAX); + index += (charlen > 0) ? charlen : 1; + continue; /* while */ + } +#endif /* TRIO_COMPILER_SUPPORTS_MULTIBYTE */ + if (CHAR_IDENTIFIER == format[index++]) + { + if (CHAR_IDENTIFIER == format[index]) + { + index++; + continue; /* while */ + } + + flags = FLAGS_NEW; + dots = 0; + currentParam = TrioGetPosition(format, &index); + positional = (NO_POSITION != currentParam); + if (!positional) + { + /* We have no positional, get the next counter */ + currentParam = parameterPosition; + } + if(currentParam >= MAX_PARAMETERS) + { + /* Bail out completely to make the error more obvious */ + return TRIO_ERROR_RETURN(TRIO_ETOOMANY, index); + } + + if (currentParam > maxParam) + maxParam = currentParam; + + /* Default values */ + width = NO_WIDTH; + precision = NO_PRECISION; + base = NO_BASE; + varsize = NO_SIZE; + + while (TrioIsQualifier(format[index])) + { + ch = format[index++]; + + switch (ch) + { + case QUALIFIER_SPACE: + flags |= FLAGS_SPACE; + break; + + case QUALIFIER_PLUS: + flags |= FLAGS_SHOWSIGN; + break; + + case QUALIFIER_MINUS: + flags |= FLAGS_LEFTADJUST; + flags &= ~FLAGS_NILPADDING; + break; + + case QUALIFIER_ALTERNATIVE: + flags |= FLAGS_ALTERNATIVE; + break; + + case QUALIFIER_DOT: + if (dots == 0) /* Precision */ + { + dots++; + + /* Skip if no precision */ + if (QUALIFIER_DOT == format[index]) + break; + + /* After the first dot we have the precision */ + flags |= FLAGS_PRECISION; + if ((QUALIFIER_STAR == format[index]) +#if defined(QUALIFIER_PARAM) + || (QUALIFIER_PARAM == format[index]) +#endif + ) + { + index++; + flags |= FLAGS_PRECISION_PARAMETER; + + precision = TrioGetPosition(format, &index); + if (precision == NO_POSITION) + { + parameterPosition++; + if (positional) + precision = parameterPosition; + else + { + precision = currentParam; + currentParam = precision + 1; + } + } + else + { + if (! positional) + currentParam = precision + 1; + if (width > maxParam) + maxParam = precision; + } + if (currentParam > maxParam) + maxParam = currentParam; + } + else + { + precision = trio_to_long(&format[index], + &tmpformat, + BASE_DECIMAL); + index = (int)(tmpformat - format); + } + } + else if (dots == 1) /* Base */ + { + dots++; + + /* After the second dot we have the base */ + flags |= FLAGS_BASE; + if ((QUALIFIER_STAR == format[index]) +#if defined(QUALIFIER_PARAM) + || (QUALIFIER_PARAM == format[index]) +#endif + ) + { + index++; + flags |= FLAGS_BASE_PARAMETER; + base = TrioGetPosition(format, &index); + if (base == NO_POSITION) + { + parameterPosition++; + if (positional) + base = parameterPosition; + else + { + base = currentParam; + currentParam = base + 1; + } + } + else + { + if (! positional) + currentParam = base + 1; + if (base > maxParam) + maxParam = base; + } + if (currentParam > maxParam) + maxParam = currentParam; + } + else + { + base = trio_to_long(&format[index], + &tmpformat, + BASE_DECIMAL); + if (base > MAX_BASE) + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + index = (int)(tmpformat - format); + } + } + else + { + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + } + break; /* QUALIFIER_DOT */ + +#if defined(QUALIFIER_PARAM) + case QUALIFIER_PARAM: + type = TYPE_PRINT; + /* FALLTHROUGH */ +#endif + case QUALIFIER_STAR: + /* This has different meanings for print and scan */ + if (TYPE_PRINT == type) + { + /* Read with from parameter */ + flags |= (FLAGS_WIDTH | FLAGS_WIDTH_PARAMETER); + width = TrioGetPosition(format, &index); + if (width == NO_POSITION) + { + parameterPosition++; + if (positional) + width = parameterPosition; + else + { + width = currentParam; + currentParam = width + 1; + } + } + else + { + if (! positional) + currentParam = width + 1; + if (width > maxParam) + maxParam = width; + } + if (currentParam > maxParam) + maxParam = currentParam; + } + else + { + /* Scan, but do not store result */ + flags |= FLAGS_IGNORE; + } + + break; /* QUALIFIER_STAR */ + + case '0': + if (! (flags & FLAGS_LEFTADJUST)) + flags |= FLAGS_NILPADDING; + /* FALLTHROUGH */ + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + flags |= FLAGS_WIDTH; + /* &format[index - 1] is used to "rewind" the read + * character from format + */ + width = trio_to_long(&format[index - 1], + &tmpformat, + BASE_DECIMAL); + index = (int)(tmpformat - format); + break; + + case QUALIFIER_SHORT: + if (flags & FLAGS_SHORTSHORT) + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + else if (flags & FLAGS_SHORT) + flags |= FLAGS_SHORTSHORT; + else + flags |= FLAGS_SHORT; + break; + + case QUALIFIER_LONG: + if (flags & FLAGS_QUAD) + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + else if (flags & FLAGS_LONG) + flags |= FLAGS_QUAD; + else + flags |= FLAGS_LONG; + break; + + case QUALIFIER_LONG_UPPER: + flags |= FLAGS_LONGDOUBLE; + break; + +#if defined(QUALIFIER_SIZE_T) + case QUALIFIER_SIZE_T: + flags |= FLAGS_SIZE_T; + /* Modify flags for later truncation of number */ + if (sizeof(size_t) == sizeof(trio_ulonglong_t)) + flags |= FLAGS_QUAD; + else if (sizeof(size_t) == sizeof(long)) + flags |= FLAGS_LONG; + break; +#endif + +#if defined(QUALIFIER_PTRDIFF_T) + case QUALIFIER_PTRDIFF_T: + flags |= FLAGS_PTRDIFF_T; + if (sizeof(ptrdiff_t) == sizeof(trio_ulonglong_t)) + flags |= FLAGS_QUAD; + else if (sizeof(ptrdiff_t) == sizeof(long)) + flags |= FLAGS_LONG; + break; +#endif + +#if defined(QUALIFIER_INTMAX_T) + case QUALIFIER_INTMAX_T: + flags |= FLAGS_INTMAX_T; + if (sizeof(trio_intmax_t) == sizeof(trio_ulonglong_t)) + flags |= FLAGS_QUAD; + else if (sizeof(trio_intmax_t) == sizeof(long)) + flags |= FLAGS_LONG; + break; +#endif + +#if defined(QUALIFIER_QUAD) + case QUALIFIER_QUAD: + flags |= FLAGS_QUAD; + break; +#endif + +#if defined(QUALIFIER_FIXED_SIZE) + case QUALIFIER_FIXED_SIZE: + if (flags & FLAGS_FIXED_SIZE) + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + + if (flags & (FLAGS_ALL_SIZES | FLAGS_LONGDOUBLE | + FLAGS_WIDECHAR | FLAGS_VARSIZE_PARAMETER)) + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + + if ((format[index] == '6') && + (format[index + 1] == '4')) + { + varsize = sizeof(trio_int64_t); + index += 2; + } + else if ((format[index] == '3') && + (format[index + 1] == '2')) + { + varsize = sizeof(trio_int32_t); + index += 2; + } + else if ((format[index] == '1') && + (format[index + 1] == '6')) + { + varsize = sizeof(trio_int16_t); + index += 2; + } + else if (format[index] == '8') + { + varsize = sizeof(trio_int8_t); + index++; + } + else + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + + flags |= FLAGS_FIXED_SIZE; + break; +#endif + +#if defined(QUALIFIER_WIDECHAR) + case QUALIFIER_WIDECHAR: + flags |= FLAGS_WIDECHAR; + break; +#endif + +#if defined(QUALIFIER_SIZE_T_UPPER) + case QUALIFIER_SIZE_T_UPPER: + break; +#endif + +#if defined(QUALIFIER_QUOTE) + case QUALIFIER_QUOTE: + flags |= FLAGS_QUOTE; + break; +#endif + +#if defined(QUALIFIER_STICKY) + case QUALIFIER_STICKY: + flags |= FLAGS_STICKY; + gotSticky = TRUE; + break; +#endif + +#if defined(QUALIFIER_VARSIZE) + case QUALIFIER_VARSIZE: + flags |= FLAGS_VARSIZE_PARAMETER; + parameterPosition++; + if (positional) + varsize = parameterPosition; + else + { + varsize = currentParam; + currentParam = varsize + 1; + } + if (currentParam > maxParam) + maxParam = currentParam; + break; +#endif + +#if defined(QUALIFIER_ROUNDING_UPPER) + case QUALIFIER_ROUNDING_UPPER: + flags |= FLAGS_ROUNDING; + break; +#endif + + default: + /* Bail out completely to make the error more obvious */ + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + } + } /* while qualifier */ + + /* + * Parameters only need the type and value. The value is + * read later. + */ + if (flags & FLAGS_WIDTH_PARAMETER) + { + usedEntries[width] += 1; + parameters[pos].type = FORMAT_PARAMETER; + parameters[pos].flags = 0; + indices[width] = pos; + width = pos++; + } + if (flags & FLAGS_PRECISION_PARAMETER) + { + usedEntries[precision] += 1; + parameters[pos].type = FORMAT_PARAMETER; + parameters[pos].flags = 0; + indices[precision] = pos; + precision = pos++; + } + if (flags & FLAGS_BASE_PARAMETER) + { + usedEntries[base] += 1; + parameters[pos].type = FORMAT_PARAMETER; + parameters[pos].flags = 0; + indices[base] = pos; + base = pos++; + } + if (flags & FLAGS_VARSIZE_PARAMETER) + { + usedEntries[varsize] += 1; + parameters[pos].type = FORMAT_PARAMETER; + parameters[pos].flags = 0; + indices[varsize] = pos; + varsize = pos++; + } + + indices[currentParam] = pos; + + switch (format[index++]) + { +#if defined(SPECIFIER_CHAR_UPPER) + case SPECIFIER_CHAR_UPPER: + flags |= FLAGS_WIDECHAR; + /* FALLTHROUGH */ +#endif + case SPECIFIER_CHAR: + if (flags & FLAGS_LONG) + flags |= FLAGS_WIDECHAR; + else if (flags & FLAGS_SHORT) + flags &= ~FLAGS_WIDECHAR; + parameters[pos].type = FORMAT_CHAR; + break; + +#if defined(SPECIFIER_STRING_UPPER) + case SPECIFIER_STRING_UPPER: + flags |= FLAGS_WIDECHAR; + /* FALLTHROUGH */ +#endif + case SPECIFIER_STRING: + if (flags & FLAGS_LONG) + flags |= FLAGS_WIDECHAR; + else if (flags & FLAGS_SHORT) + flags &= ~FLAGS_WIDECHAR; + parameters[pos].type = FORMAT_STRING; + break; + + case SPECIFIER_GROUP: + if (TYPE_SCAN == type) + { + int depth = 1; + parameters[pos].type = FORMAT_GROUP; + if (format[index] == QUALIFIER_CIRCUMFLEX) + index++; + if (format[index] == SPECIFIER_UNGROUP) + index++; + if (format[index] == QUALIFIER_MINUS) + index++; + /* Skip nested brackets */ + while (format[index] != NIL) + { + if (format[index] == SPECIFIER_GROUP) + { + depth++; + } + else if (format[index] == SPECIFIER_UNGROUP) + { + if (--depth <= 0) + { + index++; + break; + } + } + index++; + } + } + break; + + case SPECIFIER_INTEGER: + parameters[pos].type = FORMAT_INT; + break; + + case SPECIFIER_UNSIGNED: + flags |= FLAGS_UNSIGNED; + parameters[pos].type = FORMAT_INT; + break; + + case SPECIFIER_DECIMAL: + /* Disable base modifier */ + flags &= ~FLAGS_BASE_PARAMETER; + base = BASE_DECIMAL; + parameters[pos].type = FORMAT_INT; + break; + + case SPECIFIER_OCTAL: + flags &= ~FLAGS_BASE_PARAMETER; + base = BASE_OCTAL; + parameters[pos].type = FORMAT_INT; + break; + +#if defined(SPECIFIER_BINARY) + case SPECIFIER_BINARY_UPPER: + flags |= FLAGS_UPPER; + /* FALLTHROUGH */ + case SPECIFIER_BINARY: + flags |= FLAGS_NILPADDING; + flags &= ~FLAGS_BASE_PARAMETER; + base = BASE_BINARY; + parameters[pos].type = FORMAT_INT; + break; +#endif + + case SPECIFIER_HEX_UPPER: + flags |= FLAGS_UPPER; + /* FALLTHROUGH */ + case SPECIFIER_HEX: + flags |= FLAGS_UNSIGNED; + flags &= ~FLAGS_BASE_PARAMETER; + base = BASE_HEX; + parameters[pos].type = FORMAT_INT; + break; + + case SPECIFIER_FLOAT_E_UPPER: + flags |= FLAGS_UPPER; + /* FALLTHROUGH */ + case SPECIFIER_FLOAT_E: + flags |= FLAGS_FLOAT_E; + parameters[pos].type = FORMAT_DOUBLE; + break; + + case SPECIFIER_FLOAT_G_UPPER: + flags |= FLAGS_UPPER; + /* FALLTHROUGH */ + case SPECIFIER_FLOAT_G: + flags |= FLAGS_FLOAT_G; + parameters[pos].type = FORMAT_DOUBLE; + break; + + case SPECIFIER_FLOAT_F_UPPER: + flags |= FLAGS_UPPER; + /* FALLTHROUGH */ + case SPECIFIER_FLOAT_F: + parameters[pos].type = FORMAT_DOUBLE; + break; + + case SPECIFIER_POINTER: + if (sizeof(trio_pointer_t) == sizeof(trio_ulonglong_t)) + flags |= FLAGS_QUAD; + else if (sizeof(trio_pointer_t) == sizeof(long)) + flags |= FLAGS_LONG; + parameters[pos].type = FORMAT_POINTER; + break; + + case SPECIFIER_COUNT: + parameters[pos].type = FORMAT_COUNT; + break; + +#if defined(SPECIFIER_HEXFLOAT) +# if defined(SPECIFIER_HEXFLOAT_UPPER) + case SPECIFIER_HEXFLOAT_UPPER: + flags |= FLAGS_UPPER; + /* FALLTHROUGH */ +# endif + case SPECIFIER_HEXFLOAT: + base = BASE_HEX; + parameters[pos].type = FORMAT_DOUBLE; + break; +#endif + +#if defined(FORMAT_ERRNO) + case SPECIFIER_ERRNO: + parameters[pos].type = FORMAT_ERRNO; + break; +#endif + +#if defined(SPECIFIER_USER_DEFINED_BEGIN) + case SPECIFIER_USER_DEFINED_BEGIN: + { + unsigned int max; + int without_namespace = TRUE; + + parameters[pos].type = FORMAT_USER_DEFINED; + parameters[pos].user_name[0] = NIL; + tmpformat = (char *)&format[index]; + + while ((ch = format[index])) + { + index++; + if (ch == SPECIFIER_USER_DEFINED_END) + { + if (without_namespace) + { + /* We must get the handle first */ + parameters[pos].type = FORMAT_PARAMETER; + parameters[pos].indexAfterSpecifier = index; + parameters[pos].flags = FLAGS_USER_DEFINED; + /* Adjust parameters for insertion of new one */ + pos++; + usedEntries[currentParam] += 1; + parameters[pos].type = FORMAT_USER_DEFINED; + currentParam++; + indices[currentParam] = pos; + if (currentParam > maxParam) + maxParam = currentParam; + } + /* Copy the user data */ + max = (unsigned int)(&format[index] - tmpformat); + if (max > MAX_USER_DATA) + max = MAX_USER_DATA; + trio_copy_max(parameters[pos].user_data, + max, + tmpformat); + break; /* while */ + } + if (ch == SPECIFIER_USER_DEFINED_SEPARATOR) + { + without_namespace = FALSE; + /* Copy the namespace for later looking-up */ + max = (int)(&format[index] - tmpformat); + if (max > MAX_USER_NAME) + max = MAX_USER_NAME; + trio_copy_max(parameters[pos].user_name, + max, + tmpformat); + tmpformat = (char *)&format[index]; + } + } + if (ch != SPECIFIER_USER_DEFINED_END) + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + } + break; +#endif /* defined(SPECIFIER_USER_DEFINED_BEGIN) */ + + default: + /* Bail out completely to make the error more obvious */ + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + } + + /* Count the number of times this entry has been used */ + usedEntries[currentParam] += 1; + + /* Find last sticky parameters */ + if (gotSticky && !(flags & FLAGS_STICKY)) + { + for (i = pos - 1; i >= 0; i--) + { + if (parameters[i].type == FORMAT_PARAMETER) + continue; + if ((parameters[i].flags & FLAGS_STICKY) && + (parameters[i].type == parameters[pos].type)) + { + /* Do not overwrite current qualifiers */ + flags |= (parameters[i].flags & (unsigned long)~FLAGS_STICKY); + if (width == NO_WIDTH) + width = parameters[i].width; + if (precision == NO_PRECISION) + precision = parameters[i].precision; + if (base == NO_BASE) + base = parameters[i].base; + break; + } + } + } + + parameters[pos].indexAfterSpecifier = index; + parameters[pos].flags = flags; + parameters[pos].width = width; + parameters[pos].precision = precision; + parameters[pos].base = (base == NO_BASE) ? BASE_DECIMAL : base; + parameters[pos].varsize = varsize; + pos++; + + if (! positional) + parameterPosition++; + + } /* if identifier */ + + } /* while format characters left */ + + for (num = 0; num <= maxParam; num++) + { + if (usedEntries[num] != 1) + { + if (usedEntries[num] == 0) /* gap detected */ + return TRIO_ERROR_RETURN(TRIO_EGAP, num); + else /* double references detected */ + return TRIO_ERROR_RETURN(TRIO_EDBLREF, num); + } + + i = indices[num]; + + /* + * FORMAT_PARAMETERS are only present if they must be read, + * so it makes no sense to check the ignore flag (besides, + * the flags variable is not set for that particular type) + */ + if ((parameters[i].type != FORMAT_PARAMETER) && + (parameters[i].flags & FLAGS_IGNORE)) + continue; /* for all arguments */ + + /* + * The stack arguments are read according to ANSI C89 + * default argument promotions: + * + * char = int + * short = int + * unsigned char = unsigned int + * unsigned short = unsigned int + * float = double + * + * In addition to the ANSI C89 these types are read (the + * default argument promotions of C99 has not been + * considered yet) + * + * long long + * long double + * size_t + * ptrdiff_t + * intmax_t + */ + switch (parameters[i].type) + { + case FORMAT_GROUP: + case FORMAT_STRING: +#if TRIO_WIDECHAR + if (flags & FLAGS_WIDECHAR) + { + parameters[i].data.wstring = (argarray == NULL) + ? va_arg(*arglist, trio_wchar_t *) + : (trio_wchar_t *)(argarray[num]); + } + else +#endif + { + parameters[i].data.string = (argarray == NULL) + ? va_arg(*arglist, char *) + : (char *)(argarray[num]); + } + break; + +#if defined(FORMAT_USER_DEFINED) + case FORMAT_USER_DEFINED: +#endif + case FORMAT_POINTER: + case FORMAT_COUNT: + case FORMAT_UNKNOWN: + parameters[i].data.pointer = (argarray == NULL) + ? va_arg(*arglist, trio_pointer_t ) + : argarray[num]; + break; + + case FORMAT_CHAR: + case FORMAT_INT: + if (TYPE_SCAN == type) + { + if (argarray == NULL) + parameters[i].data.pointer = + (trio_pointer_t)va_arg(*arglist, trio_pointer_t); + else + { + if (parameters[i].type == FORMAT_CHAR) + parameters[i].data.pointer = + (trio_pointer_t)((char *)argarray[num]); + else if (parameters[i].flags & FLAGS_SHORT) + parameters[i].data.pointer = + (trio_pointer_t)((short *)argarray[num]); + else + parameters[i].data.pointer = + (trio_pointer_t)((int *)argarray[num]); + } + } + else + { +#if defined(QUALIFIER_VARSIZE) || defined(QUALIFIER_FIXED_SIZE) + if (parameters[i].flags + & (FLAGS_VARSIZE_PARAMETER | FLAGS_FIXED_SIZE)) + { + if (parameters[i].flags & FLAGS_VARSIZE_PARAMETER) + { + /* + * Variable sizes are mapped onto the fixed sizes, in + * accordance with integer promotion. + * + * Please note that this may not be portable, as we + * only guess the size, not the layout of the numbers. + * For example, if int is little-endian, and long is + * big-endian, then this will fail. + */ + varsize = (int)parameters[parameters[i].varsize].data.number.as_unsigned; + } + else + { + /* Used for the I<bits> modifiers */ + varsize = parameters[i].varsize; + } + parameters[i].flags &= ~FLAGS_ALL_VARSIZES; + + if (varsize <= (int)sizeof(int)) + ; + else if (varsize <= (int)sizeof(long)) + parameters[i].flags |= FLAGS_LONG; +#if defined(QUALIFIER_INTMAX_T) + else if (varsize <= (int)sizeof(trio_longlong_t)) + parameters[i].flags |= FLAGS_QUAD; + else + parameters[i].flags |= FLAGS_INTMAX_T; +#else + else + parameters[i].flags |= FLAGS_QUAD; +#endif + } +#endif /* defined(QUALIFIER_VARSIZE) */ +#if defined(QUALIFIER_SIZE_T) || defined(QUALIFIER_SIZE_T_UPPER) + if (parameters[i].flags & FLAGS_SIZE_T) + parameters[i].data.number.as_unsigned = (argarray == NULL) + ? (trio_uintmax_t)va_arg(*arglist, size_t) + : (trio_uintmax_t)(*((size_t *)argarray[num])); + else +#endif +#if defined(QUALIFIER_PTRDIFF_T) + if (parameters[i].flags & FLAGS_PTRDIFF_T) + parameters[i].data.number.as_unsigned = (argarray == NULL) + ? (trio_uintmax_t)va_arg(*arglist, ptrdiff_t) + : (trio_uintmax_t)(*((ptrdiff_t *)argarray[num])); + else +#endif +#if defined(QUALIFIER_INTMAX_T) + if (parameters[i].flags & FLAGS_INTMAX_T) + parameters[i].data.number.as_unsigned = (argarray == NULL) + ? (trio_uintmax_t)va_arg(*arglist, trio_intmax_t) + : (trio_uintmax_t)(*((trio_intmax_t *)argarray[num])); + else +#endif + if (parameters[i].flags & FLAGS_QUAD) + parameters[i].data.number.as_unsigned = (argarray == NULL) + ? (trio_uintmax_t)va_arg(*arglist, trio_ulonglong_t) + : (trio_uintmax_t)(*((trio_ulonglong_t *)argarray[num])); + else if (parameters[i].flags & FLAGS_LONG) + parameters[i].data.number.as_unsigned = (argarray == NULL) + ? (trio_uintmax_t)va_arg(*arglist, long) + : (trio_uintmax_t)(*((long *)argarray[num])); + else + { + if (argarray == NULL) + parameters[i].data.number.as_unsigned = (trio_uintmax_t)va_arg(*arglist, int); + else + { + if (parameters[i].type == FORMAT_CHAR) + parameters[i].data.number.as_unsigned = (trio_uintmax_t)(*((char *)argarray[num])); + else if (parameters[i].flags & FLAGS_SHORT) + parameters[i].data.number.as_unsigned = (trio_uintmax_t)(*((short *)argarray[num])); + else + parameters[i].data.number.as_unsigned = (trio_uintmax_t)(*((int *)argarray[num])); + } + } + } + break; + + case FORMAT_PARAMETER: + /* + * The parameter for the user-defined specifier is a pointer, + * whereas the rest (width, precision, base) uses an integer. + */ + if (parameters[i].flags & FLAGS_USER_DEFINED) + parameters[i].data.pointer = (argarray == NULL) + ? va_arg(*arglist, trio_pointer_t ) + : argarray[num]; + else + parameters[i].data.number.as_unsigned = (argarray == NULL) + ? (trio_uintmax_t)va_arg(*arglist, int) + : (trio_uintmax_t)(*((int *)argarray[num])); + break; + + case FORMAT_DOUBLE: + if (TYPE_SCAN == type) + { + if (parameters[i].flags & FLAGS_LONGDOUBLE) + parameters[i].data.longdoublePointer = (argarray == NULL) + ? va_arg(*arglist, trio_long_double_t *) + : (trio_long_double_t *)argarray[num]; + else + { + if (parameters[i].flags & FLAGS_LONG) + parameters[i].data.doublePointer = (argarray == NULL) + ? va_arg(*arglist, double *) + : (double *)argarray[num]; + else + parameters[i].data.doublePointer = (argarray == NULL) + ? (double *)va_arg(*arglist, float *) + : (double *)((float *)argarray[num]); + } + } + else + { + if (parameters[i].flags & FLAGS_LONGDOUBLE) + parameters[i].data.longdoubleNumber = (argarray == NULL) + ? va_arg(*arglist, trio_long_double_t) + : (trio_long_double_t)(*((trio_long_double_t *)argarray[num])); + else + { + if (argarray == NULL) + parameters[i].data.longdoubleNumber = + (trio_long_double_t)va_arg(*arglist, double); + else + { + if (parameters[i].flags & FLAGS_SHORT) + parameters[i].data.longdoubleNumber = + (trio_long_double_t)(*((float *)argarray[num])); + else + parameters[i].data.longdoubleNumber = + (trio_long_double_t)(*((double *)argarray[num])); + } + } + } + break; + +#if defined(FORMAT_ERRNO) + case FORMAT_ERRNO: + parameters[i].data.errorNumber = save_errno; + break; +#endif + + default: + break; + } + } /* for all specifiers */ + return num; +} + + +/************************************************************************* + * + * FORMATTING + * + ************************************************************************/ + + +/************************************************************************* + * TrioWriteNumber + * + * Description: + * Output a number. + * The complexity of this function is a result of the complexity + * of the dependencies of the flags. + */ +TRIO_PRIVATE void +TrioWriteNumber +TRIO_ARGS6((self, number, flags, width, precision, base), + trio_class_t *self, + trio_uintmax_t number, + unsigned long flags, + int width, + int precision, + int base) +{ + BOOLEAN_T isNegative; + char buffer[MAX_CHARS_IN(trio_uintmax_t) * (1 + MAX_LOCALE_SEPARATOR_LENGTH) + 1]; + char *bufferend; + char *pointer; + TRIO_CONST char *digits; + int i; + int length; + char *p; + int count; + + assert(VALID(self)); + assert(VALID(self->OutStream)); + assert(((base >= MIN_BASE) && (base <= MAX_BASE)) || (base == NO_BASE)); + + digits = (flags & FLAGS_UPPER) ? internalDigitsUpper : internalDigitsLower; + if (base == NO_BASE) + base = BASE_DECIMAL; + + isNegative = (flags & FLAGS_UNSIGNED) + ? FALSE + : ((trio_intmax_t)number < 0); + if (isNegative) + number = -((trio_intmax_t)number); + + if (flags & FLAGS_QUAD) + number &= (trio_ulonglong_t)-1; + else if (flags & FLAGS_LONG) + number &= (unsigned long)-1; + else + number &= (unsigned int)-1; + + /* Build number */ + pointer = bufferend = &buffer[sizeof(buffer) - 1]; + *pointer-- = NIL; + for (i = 1; i < (int)sizeof(buffer); i++) + { + *pointer-- = digits[number % base]; + number /= base; + if (number == 0) + break; + + if ((flags & FLAGS_QUOTE) && TrioFollowedBySeparator(i + 1)) + { + /* + * We are building the number from the least significant + * to the most significant digit, so we have to copy the + * thousand separator backwards + */ + length = internalThousandSeparatorLength; + if (((int)(pointer - buffer) - length) > 0) + { + p = &internalThousandSeparator[length - 1]; + while (length-- > 0) + *pointer-- = *p--; + } + } + } + + /* Adjust width */ + width -= (bufferend - pointer) - 1; + + /* Adjust precision */ + if (NO_PRECISION != precision) + { + precision -= (bufferend - pointer) - 1; + if (precision < 0) + precision = 0; + flags |= FLAGS_NILPADDING; + } + + /* Adjust width further */ + if (isNegative || (flags & FLAGS_SHOWSIGN) || (flags & FLAGS_SPACE)) + width--; + if (flags & FLAGS_ALTERNATIVE) + { + switch (base) + { + case BASE_BINARY: + case BASE_HEX: + width -= 2; + break; + case BASE_OCTAL: + width--; + break; + default: + break; + } + } + + /* Output prefixes spaces if needed */ + if (! ((flags & FLAGS_LEFTADJUST) || + ((flags & FLAGS_NILPADDING) && (precision == NO_PRECISION)))) + { + count = (precision == NO_PRECISION) ? 0 : precision; + while (width-- > count) + self->OutStream(self, CHAR_ADJUST); + } + + /* width has been adjusted for signs and alternatives */ + if (isNegative) + self->OutStream(self, '-'); + else if (flags & FLAGS_SHOWSIGN) + self->OutStream(self, '+'); + else if (flags & FLAGS_SPACE) + self->OutStream(self, ' '); + + if (flags & FLAGS_ALTERNATIVE) + { + switch (base) + { + case BASE_BINARY: + self->OutStream(self, '0'); + self->OutStream(self, (flags & FLAGS_UPPER) ? 'B' : 'b'); + break; + + case BASE_OCTAL: + self->OutStream(self, '0'); + break; + + case BASE_HEX: + self->OutStream(self, '0'); + self->OutStream(self, (flags & FLAGS_UPPER) ? 'X' : 'x'); + break; + + default: + break; + } /* switch base */ + } + + /* Output prefixed zero padding if needed */ + if (flags & FLAGS_NILPADDING) + { + if (precision == NO_PRECISION) + precision = width; + while (precision-- > 0) + { + self->OutStream(self, '0'); + width--; + } + } + + /* Output the number itself */ + while (*(++pointer)) + { + self->OutStream(self, *pointer); + } + + /* Output trailing spaces if needed */ + if (flags & FLAGS_LEFTADJUST) + { + while (width-- > 0) + self->OutStream(self, CHAR_ADJUST); + } +} + +/************************************************************************* + * TrioWriteStringCharacter + * + * Description: + * Output a single character of a string + */ +TRIO_PRIVATE void +TrioWriteStringCharacter +TRIO_ARGS3((self, ch, flags), + trio_class_t *self, + int ch, + unsigned long flags) +{ + if (flags & FLAGS_ALTERNATIVE) + { + if (! isprint(ch)) + { + /* + * Non-printable characters are converted to C escapes or + * \number, if no C escape exists. + */ + self->OutStream(self, CHAR_BACKSLASH); + switch (ch) + { + case '\007': self->OutStream(self, 'a'); break; + case '\b': self->OutStream(self, 'b'); break; + case '\f': self->OutStream(self, 'f'); break; + case '\n': self->OutStream(self, 'n'); break; + case '\r': self->OutStream(self, 'r'); break; + case '\t': self->OutStream(self, 't'); break; + case '\v': self->OutStream(self, 'v'); break; + case '\\': self->OutStream(self, '\\'); break; + default: + self->OutStream(self, 'x'); + TrioWriteNumber(self, (trio_uintmax_t)ch, + FLAGS_UNSIGNED | FLAGS_NILPADDING, + 2, 2, BASE_HEX); + break; + } + } + else if (ch == CHAR_BACKSLASH) + { + self->OutStream(self, CHAR_BACKSLASH); + self->OutStream(self, CHAR_BACKSLASH); + } + else + { + self->OutStream(self, ch); + } + } + else + { + self->OutStream(self, ch); + } +} + +/************************************************************************* + * TrioWriteString + * + * Description: + * Output a string + */ +TRIO_PRIVATE void +TrioWriteString +TRIO_ARGS5((self, string, flags, width, precision), + trio_class_t *self, + TRIO_CONST char *string, + unsigned long flags, + int width, + int precision) +{ + int length; + int ch; + + assert(VALID(self)); + assert(VALID(self->OutStream)); + + if (string == NULL) + { + string = internalNullString; + length = sizeof(internalNullString) - 1; + /* Disable quoting for the null pointer */ + flags &= (~FLAGS_QUOTE); + width = 0; + } + else + { + length = trio_length(string); + } + if ((NO_PRECISION != precision) && + (precision < length)) + { + length = precision; + } + width -= length; + + if (flags & FLAGS_QUOTE) + self->OutStream(self, CHAR_QUOTE); + + if (! (flags & FLAGS_LEFTADJUST)) + { + while (width-- > 0) + self->OutStream(self, CHAR_ADJUST); + } + + while (length-- > 0) + { + /* The ctype parameters must be an unsigned char (or EOF) */ + ch = (int)((unsigned char)(*string++)); + TrioWriteStringCharacter(self, ch, flags); + } + + if (flags & FLAGS_LEFTADJUST) + { + while (width-- > 0) + self->OutStream(self, CHAR_ADJUST); + } + if (flags & FLAGS_QUOTE) + self->OutStream(self, CHAR_QUOTE); +} + +/************************************************************************* + * TrioWriteWideStringCharacter + * + * Description: + * Output a wide string as a multi-byte sequence + */ +#if TRIO_WIDECHAR +TRIO_PRIVATE int +TrioWriteWideStringCharacter +TRIO_ARGS4((self, wch, flags, width), + trio_class_t *self, + trio_wchar_t wch, + unsigned long flags, + int width) +{ + int size; + int i; + int ch; + char *string; + char buffer[MB_LEN_MAX + 1]; + + if (width == NO_WIDTH) + width = sizeof(buffer); + + size = wctomb(buffer, wch); + if ((size <= 0) || (size > width) || (buffer[0] == NIL)) + return 0; + + string = buffer; + i = size; + while ((width >= i) && (width-- > 0) && (i-- > 0)) + { + /* The ctype parameters must be an unsigned char (or EOF) */ + ch = (int)((unsigned char)(*string++)); + TrioWriteStringCharacter(self, ch, flags); + } + return size; +} +#endif /* TRIO_WIDECHAR */ + +/************************************************************************* + * TrioWriteWideString + * + * Description: + * Output a wide character string as a multi-byte string + */ +#if TRIO_WIDECHAR +TRIO_PRIVATE void +TrioWriteWideString +TRIO_ARGS5((self, wstring, flags, width, precision), + trio_class_t *self, + TRIO_CONST trio_wchar_t *wstring, + unsigned long flags, + int width, + int precision) +{ + int length; + int size; + + assert(VALID(self)); + assert(VALID(self->OutStream)); + +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + (void)mblen(NULL, 0); +#endif + + if (wstring == NULL) + { + TrioWriteString(self, NULL, flags, width, precision); + return; + } + + if (NO_PRECISION == precision) + { + length = INT_MAX; + } + else + { + length = precision; + width -= length; + } + + if (flags & FLAGS_QUOTE) + self->OutStream(self, CHAR_QUOTE); + + if (! (flags & FLAGS_LEFTADJUST)) + { + while (width-- > 0) + self->OutStream(self, CHAR_ADJUST); + } + + while (length > 0) + { + size = TrioWriteWideStringCharacter(self, *wstring++, flags, length); + if (size == 0) + break; /* while */ + length -= size; + } + + if (flags & FLAGS_LEFTADJUST) + { + while (width-- > 0) + self->OutStream(self, CHAR_ADJUST); + } + if (flags & FLAGS_QUOTE) + self->OutStream(self, CHAR_QUOTE); +} +#endif /* TRIO_WIDECHAR */ + +/************************************************************************* + * TrioWriteDouble + * + * http://wwwold.dkuug.dk/JTC1/SC22/WG14/www/docs/dr_211.htm + * + * "5.2.4.2.2 paragraph #4 + * + * The accuracy [...] is implementation defined, as is the accuracy + * of the conversion between floating-point internal representations + * and string representations performed by the libray routine in + * <stdio.h>" + */ +/* FIXME: handle all instances of constant long-double number (L) + * and *l() math functions. + */ +TRIO_PRIVATE void +TrioWriteDouble +TRIO_ARGS6((self, number, flags, width, precision, base), + trio_class_t *self, + trio_long_double_t number, + unsigned long flags, + int width, + int precision, + int base) +{ + trio_long_double_t integerNumber; + trio_long_double_t fractionNumber; + trio_long_double_t workNumber; + int integerDigits; + int fractionDigits; + int exponentDigits; + int baseDigits; + int integerThreshold; + int fractionThreshold; + int expectedWidth; + int exponent = 0; + unsigned int uExponent = 0; + int exponentBase; + trio_long_double_t dblBase; + trio_long_double_t dblIntegerBase; + trio_long_double_t dblFractionBase; + trio_long_double_t integerAdjust; + trio_long_double_t fractionAdjust; + BOOLEAN_T isNegative; + BOOLEAN_T isExponentNegative = FALSE; + BOOLEAN_T requireTwoDigitExponent; + BOOLEAN_T isHex; + TRIO_CONST char *digits; + char *groupingPointer; + int i; + int index; + BOOLEAN_T hasOnlyZeroes; + int zeroes = 0; + register int trailingZeroes; + BOOLEAN_T keepTrailingZeroes; + BOOLEAN_T keepDecimalPoint; + trio_long_double_t epsilon; + + assert(VALID(self)); + assert(VALID(self->OutStream)); + assert(((base >= MIN_BASE) && (base <= MAX_BASE)) || (base == NO_BASE)); + + /* Determine sign and look for special quantities */ + switch (trio_fpclassify_and_signbit(number, &isNegative)) + { + case TRIO_FP_NAN: + TrioWriteString(self, + (flags & FLAGS_UPPER) + ? NAN_UPPER + : NAN_LOWER, + flags, width, precision); + return; + + case TRIO_FP_INFINITE: + if (isNegative) + { + /* Negative infinity */ + TrioWriteString(self, + (flags & FLAGS_UPPER) + ? "-" INFINITE_UPPER + : "-" INFINITE_LOWER, + flags, width, precision); + return; + } + else + { + /* Positive infinity */ + TrioWriteString(self, + (flags & FLAGS_UPPER) + ? INFINITE_UPPER + : INFINITE_LOWER, + flags, width, precision); + return; + } + + default: + /* Finitude */ + break; + } + + /* Normal numbers */ + if (flags & FLAGS_LONGDOUBLE) + { + baseDigits = (base == 10) + ? LDBL_DIG + : (int)floor(LDBL_MANT_DIG / TrioLogarithmBase(base)); + epsilon = LDBL_EPSILON; + } + else if (flags & FLAGS_SHORT) + { + baseDigits = (base == BASE_DECIMAL) + ? FLT_DIG + : (int)floor(FLT_MANT_DIG / TrioLogarithmBase(base)); + epsilon = FLT_EPSILON; + } + else + { + baseDigits = (base == BASE_DECIMAL) + ? DBL_DIG + : (int)floor(DBL_MANT_DIG / TrioLogarithmBase(base)); + epsilon = DBL_EPSILON; + } + + digits = (flags & FLAGS_UPPER) ? internalDigitsUpper : internalDigitsLower; + isHex = (base == BASE_HEX); + if (base == NO_BASE) + base = BASE_DECIMAL; + dblBase = (trio_long_double_t)base; + keepTrailingZeroes = !( (flags & FLAGS_ROUNDING) || + ( (flags & FLAGS_FLOAT_G) && + !(flags & FLAGS_ALTERNATIVE) ) ); + + if (flags & FLAGS_ROUNDING) + precision = baseDigits; + + if (precision == NO_PRECISION) + precision = FLT_DIG; + + if (isNegative) + number = -number; + + if (isHex) + flags |= FLAGS_FLOAT_E; + + if (flags & FLAGS_FLOAT_G) + { + if (precision == 0) + precision = 1; + + if ((number < 1.0E-4) || (number > powl(base, + (trio_long_double_t)precision))) + { + /* Use scientific notation */ + flags |= FLAGS_FLOAT_E; + } + else if (number < 1.0) + { + /* + * Use normal notation. If the integer part of the number is + * zero, then adjust the precision to include leading fractional + * zeros. + */ + workNumber = TrioLogarithm(number, base); + workNumber = TRIO_FABS(workNumber); + if (workNumber - floorl(workNumber) < 0.001) + workNumber--; + zeroes = (int)floorl(workNumber); + } + } + + if (flags & FLAGS_FLOAT_E) + { + /* Scale the number */ + workNumber = TrioLogarithm(number, base); + if (trio_isinf(workNumber) == -1) + { + exponent = 0; + /* Undo setting */ + if (flags & FLAGS_FLOAT_G) + flags &= ~FLAGS_FLOAT_E; + } + else + { + exponent = (int)floorl(workNumber); + number /= powl(dblBase, (trio_long_double_t)exponent); + isExponentNegative = (exponent < 0); + uExponent = (isExponentNegative) ? -exponent : exponent; + /* No thousand separators */ + flags &= ~FLAGS_QUOTE; + } + } + + integerNumber = floorl(number); + fractionNumber = number - integerNumber; + + /* + * Truncated number. + * + * Precision is number of significant digits for FLOAT_G + * and number of fractional digits for others. + */ + integerDigits = (integerNumber > epsilon) + ? 1 + (int)TrioLogarithm(integerNumber, base) + : 1; + fractionDigits = ((flags & FLAGS_FLOAT_G) && (zeroes == 0)) + ? precision - integerDigits + : zeroes + precision; + + dblFractionBase = TrioPower(base, fractionDigits); + + workNumber = number + 0.5 / dblFractionBase; + if (floorl(number) != floorl(workNumber)) + { + if (flags & FLAGS_FLOAT_E) + { + /* Adjust if number was rounded up one digit (ie. 0.99 to 1.00) */ + exponent++; + isExponentNegative = (exponent < 0); + uExponent = (isExponentNegative) ? -exponent : exponent; + workNumber = (number + 0.5 / dblFractionBase) / dblBase; + integerNumber = floorl(workNumber); + fractionNumber = workNumber - integerNumber; + } + else + { + /* Adjust if number was rounded up one digit (ie. 99 to 100) */ + integerNumber = floorl(number + 0.5); + fractionNumber = 0.0; + integerDigits = (integerNumber > epsilon) + ? 1 + (int)TrioLogarithm(integerNumber, base) + : 1; + } + } + + /* Estimate accuracy */ + integerAdjust = fractionAdjust = 0.5; + if (flags & FLAGS_ROUNDING) + { + if (integerDigits > baseDigits) + { + integerThreshold = baseDigits; + fractionDigits = 0; + dblFractionBase = 1.0; + fractionThreshold = 0; + precision = 0; /* Disable decimal-point */ + integerAdjust = TrioPower(base, integerDigits - integerThreshold - 1); + fractionAdjust = 0.0; + } + else + { + integerThreshold = integerDigits; + fractionThreshold = fractionDigits - integerThreshold; + fractionAdjust = 1.0; + } + } + else + { + integerThreshold = INT_MAX; + fractionThreshold = INT_MAX; + } + + /* + * Calculate expected width. + * sign + integer part + thousands separators + decimal point + * + fraction + exponent + */ + fractionAdjust /= dblFractionBase; + hasOnlyZeroes = (floorl((fractionNumber + fractionAdjust) * dblFractionBase) < epsilon); + keepDecimalPoint = ( (flags & FLAGS_ALTERNATIVE) || + !((precision == 0) || + (!keepTrailingZeroes && hasOnlyZeroes)) ); + if (flags & FLAGS_FLOAT_E) + { + exponentDigits = (uExponent == 0) + ? 1 + : (int)ceil(TrioLogarithm((double)(uExponent + 1), base)); + } + else + exponentDigits = 0; + requireTwoDigitExponent = ((base == BASE_DECIMAL) && (exponentDigits == 1)); + + expectedWidth = integerDigits + fractionDigits + + (keepDecimalPoint + ? internalDecimalPointLength + : 0) + + ((flags & FLAGS_QUOTE) + ? TrioCalcThousandSeparatorLength(integerDigits) + : 0); + if (isNegative || (flags & FLAGS_SHOWSIGN) || (flags & FLAGS_SPACE)) + expectedWidth += sizeof("-") - 1; + if (exponentDigits > 0) + expectedWidth += exponentDigits + + ((requireTwoDigitExponent ? sizeof("E+0") : sizeof("E+")) - 1); + if (isHex) + expectedWidth += sizeof("0X") - 1; + + /* Output prefixing */ + if (flags & FLAGS_NILPADDING) + { + /* Leading zeros must be after sign */ + if (isNegative) + self->OutStream(self, '-'); + else if (flags & FLAGS_SHOWSIGN) + self->OutStream(self, '+'); + else if (flags & FLAGS_SPACE) + self->OutStream(self, ' '); + if (isHex) + { + self->OutStream(self, '0'); + self->OutStream(self, (flags & FLAGS_UPPER) ? 'X' : 'x'); + } + if (!(flags & FLAGS_LEFTADJUST)) + { + for (i = expectedWidth; i < width; i++) + { + self->OutStream(self, '0'); + } + } + } + else + { + /* Leading spaces must be before sign */ + if (!(flags & FLAGS_LEFTADJUST)) + { + for (i = expectedWidth; i < width; i++) + { + self->OutStream(self, CHAR_ADJUST); + } + } + if (isNegative) + self->OutStream(self, '-'); + else if (flags & FLAGS_SHOWSIGN) + self->OutStream(self, '+'); + else if (flags & FLAGS_SPACE) + self->OutStream(self, ' '); + if (isHex) + { + self->OutStream(self, '0'); + self->OutStream(self, (flags & FLAGS_UPPER) ? 'X' : 'x'); + } + } + + /* Output the integer part and thousand separators */ + dblIntegerBase = 1.0 / TrioPower(base, integerDigits - 1); + for (i = 0; i < integerDigits; i++) + { + workNumber = floorl(((integerNumber + integerAdjust) * dblIntegerBase)); + if (i > integerThreshold) + { + /* Beyond accuracy */ + self->OutStream(self, digits[0]); + } + else + { + self->OutStream(self, digits[(int)fmodl(workNumber, dblBase)]); + } + dblIntegerBase *= dblBase; + + if (((flags & (FLAGS_FLOAT_E | FLAGS_QUOTE)) == FLAGS_QUOTE) + && TrioFollowedBySeparator(integerDigits - i)) + { + for (groupingPointer = internalThousandSeparator; + *groupingPointer != NIL; + groupingPointer++) + { + self->OutStream(self, *groupingPointer); + } + } + } + + /* Insert decimal point and build the fraction part */ + trailingZeroes = 0; + + if (keepDecimalPoint) + { + if (internalDecimalPoint) + { + self->OutStream(self, internalDecimalPoint); + } + else + { + for (i = 0; i < internalDecimalPointLength; i++) + { + self->OutStream(self, internalDecimalPointString[i]); + } + } + } + + for (i = 0; i < fractionDigits; i++) + { + if ((integerDigits > integerThreshold) || (i > fractionThreshold)) + { + /* Beyond accuracy */ + trailingZeroes++; + } + else + { + fractionNumber *= dblBase; + fractionAdjust *= dblBase; + workNumber = floorl(fractionNumber + fractionAdjust); + fractionNumber -= workNumber; + index = (int)fmodl(workNumber, dblBase); + if (index == 0) + { + trailingZeroes++; + } + else + { + while (trailingZeroes > 0) + { + /* Not trailing zeroes after all */ + self->OutStream(self, digits[0]); + trailingZeroes--; + } + self->OutStream(self, digits[index]); + } + } + } + + if (keepTrailingZeroes) + { + while (trailingZeroes > 0) + { + self->OutStream(self, digits[0]); + trailingZeroes--; + } + } + + /* Output exponent */ + if (exponentDigits > 0) + { + self->OutStream(self, + isHex + ? ((flags & FLAGS_UPPER) ? 'P' : 'p') + : ((flags & FLAGS_UPPER) ? 'E' : 'e')); + self->OutStream(self, (isExponentNegative) ? '-' : '+'); + + /* The exponent must contain at least two digits */ + if (requireTwoDigitExponent) + self->OutStream(self, '0'); + + exponentBase = (int)TrioPower(base, exponentDigits - 1); + for (i = 0; i < exponentDigits; i++) + { + self->OutStream(self, digits[(uExponent / exponentBase) % base]); + exponentBase /= base; + } + } + /* Output trailing spaces */ + if (flags & FLAGS_LEFTADJUST) + { + for (i = expectedWidth; i < width; i++) + { + self->OutStream(self, CHAR_ADJUST); + } + } +} + +/************************************************************************* + * TrioFormatProcess + * + * Description: + * This is the main engine for formatting output + */ +TRIO_PRIVATE int +TrioFormatProcess +TRIO_ARGS3((data, format, parameters), + trio_class_t *data, + TRIO_CONST char *format, + trio_parameter_t *parameters) +{ +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + int charlen; +#endif + int i; + TRIO_CONST char *string; + trio_pointer_t pointer; + unsigned long flags; + int width; + int precision; + int base; + int index; + + index = 0; + i = 0; +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + (void)mblen(NULL, 0); +#endif + + while (format[index]) + { +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + if (! isascii(format[index])) + { + charlen = mblen(&format[index], MB_LEN_MAX); + /* + * Only valid multibyte characters are handled here. Invalid + * multibyte characters (charlen == -1) are handled as normal + * characters. + */ + if (charlen != -1) + { + while (charlen-- > 0) + { + data->OutStream(data, format[index++]); + } + continue; /* while characters left in formatting string */ + } + } +#endif /* TRIO_COMPILER_SUPPORTS_MULTIBYTE */ + if (CHAR_IDENTIFIER == format[index]) + { + if (CHAR_IDENTIFIER == format[index + 1]) + { + data->OutStream(data, CHAR_IDENTIFIER); + index += 2; + } + else + { + /* Skip the parameter entries */ + while (parameters[i].type == FORMAT_PARAMETER) + i++; + + flags = parameters[i].flags; + + /* Find width */ + width = parameters[i].width; + if (flags & FLAGS_WIDTH_PARAMETER) + { + /* Get width from parameter list */ + width = (int)parameters[width].data.number.as_signed; + } + + /* Find precision */ + if (flags & FLAGS_PRECISION) + { + precision = parameters[i].precision; + if (flags & FLAGS_PRECISION_PARAMETER) + { + /* Get precision from parameter list */ + precision = (int)parameters[precision].data.number.as_signed; + } + } + else + { + precision = NO_PRECISION; + } + + /* Find base */ + base = parameters[i].base; + if (flags & FLAGS_BASE_PARAMETER) + { + /* Get base from parameter list */ + base = (int)parameters[base].data.number.as_signed; + } + + switch (parameters[i].type) + { + case FORMAT_CHAR: + if (flags & FLAGS_QUOTE) + data->OutStream(data, CHAR_QUOTE); + if (! (flags & FLAGS_LEFTADJUST)) + { + while (--width > 0) + data->OutStream(data, CHAR_ADJUST); + } +#if TRIO_WIDECHAR + if (flags & FLAGS_WIDECHAR) + { + TrioWriteWideStringCharacter(data, + (trio_wchar_t)parameters[i].data.number.as_signed, + flags, + NO_WIDTH); + } + else +#endif + { + TrioWriteStringCharacter(data, + (int)parameters[i].data.number.as_signed, + flags); + } + + if (flags & FLAGS_LEFTADJUST) + { + while(--width > 0) + data->OutStream(data, CHAR_ADJUST); + } + if (flags & FLAGS_QUOTE) + data->OutStream(data, CHAR_QUOTE); + + break; /* FORMAT_CHAR */ + + case FORMAT_INT: + TrioWriteNumber(data, + parameters[i].data.number.as_unsigned, + flags, + width, + precision, + base); + + break; /* FORMAT_INT */ + + case FORMAT_DOUBLE: + TrioWriteDouble(data, + parameters[i].data.longdoubleNumber, + flags, + width, + precision, + base); + break; /* FORMAT_DOUBLE */ + + case FORMAT_STRING: +#if TRIO_WIDECHAR + if (flags & FLAGS_WIDECHAR) + { + TrioWriteWideString(data, + parameters[i].data.wstring, + flags, + width, + precision); + } + else +#endif + { + TrioWriteString(data, + parameters[i].data.string, + flags, + width, + precision); + } + break; /* FORMAT_STRING */ + + case FORMAT_POINTER: + { + trio_reference_t reference; + + reference.data = data; + reference.parameter = ¶meters[i]; + trio_print_pointer(&reference, parameters[i].data.pointer); + } + break; /* FORMAT_POINTER */ + + case FORMAT_COUNT: + pointer = parameters[i].data.pointer; + if (NULL != pointer) + { + /* + * C99 paragraph 7.19.6.1.8 says "the number of + * characters written to the output stream so far by + * this call", which is data->committed + */ +#if defined(QUALIFIER_SIZE_T) || defined(QUALIFIER_SIZE_T_UPPER) + if (flags & FLAGS_SIZE_T) + *(size_t *)pointer = (size_t)data->committed; + else +#endif +#if defined(QUALIFIER_PTRDIFF_T) + if (flags & FLAGS_PTRDIFF_T) + *(ptrdiff_t *)pointer = (ptrdiff_t)data->committed; + else +#endif +#if defined(QUALIFIER_INTMAX_T) + if (flags & FLAGS_INTMAX_T) + *(trio_intmax_t *)pointer = (trio_intmax_t)data->committed; + else +#endif + if (flags & FLAGS_QUAD) + { + *(trio_ulonglong_t *)pointer = (trio_ulonglong_t)data->committed; + } + else if (flags & FLAGS_LONG) + { + *(long int *)pointer = (long int)data->committed; + } + else if (flags & FLAGS_SHORT) + { + *(short int *)pointer = (short int)data->committed; + } + else + { + *(int *)pointer = (int)data->committed; + } + } + break; /* FORMAT_COUNT */ + + case FORMAT_PARAMETER: + break; /* FORMAT_PARAMETER */ + +#if defined(FORMAT_ERRNO) + case FORMAT_ERRNO: + string = trio_error(parameters[i].data.errorNumber); + if (string) + { + TrioWriteString(data, + string, + flags, + width, + precision); + } + else + { + data->OutStream(data, '#'); + TrioWriteNumber(data, + (trio_uintmax_t)parameters[i].data.errorNumber, + flags, + width, + precision, + BASE_DECIMAL); + } + break; /* FORMAT_ERRNO */ +#endif /* defined(FORMAT_ERRNO) */ + +#if defined(FORMAT_USER_DEFINED) + case FORMAT_USER_DEFINED: + { + trio_reference_t reference; + trio_userdef_t *def = NULL; + + if (parameters[i].user_name[0] == NIL) + { + /* Use handle */ + if ((i > 0) || + (parameters[i - 1].type == FORMAT_PARAMETER)) + def = (trio_userdef_t *)parameters[i - 1].data.pointer; + } + else + { + /* Look up namespace */ + def = TrioFindNamespace(parameters[i].user_name, NULL); + } + if (def) { + reference.data = data; + reference.parameter = ¶meters[i]; + def->callback(&reference); + } + } + break; +#endif /* defined(FORMAT_USER_DEFINED) */ + + default: + break; + } /* switch parameter type */ + + /* Prepare for next */ + index = parameters[i].indexAfterSpecifier; + i++; + } + } + else /* not identifier */ + { + data->OutStream(data, format[index++]); + } + } + return data->processed; +} + +/************************************************************************* + * TrioFormatRef + */ +TRIO_PRIVATE int +TrioFormatRef +TRIO_ARGS4((reference, format, arglist, argarray), + trio_reference_t *reference, + TRIO_CONST char *format, + va_list *arglist, + trio_pointer_t *argarray) +{ + int status; + trio_parameter_t parameters[MAX_PARAMETERS]; + + status = TrioParse(TYPE_PRINT, format, parameters, arglist, argarray); + if (status < 0) + return status; + + status = TrioFormatProcess(reference->data, format, parameters); + if (reference->data->error != 0) + { + status = reference->data->error; + } + return status; +} + +/************************************************************************* + * TrioFormat + */ +TRIO_PRIVATE int +TrioFormat +TRIO_ARGS6((destination, destinationSize, OutStream, format, arglist, argarray), + trio_pointer_t destination, + size_t destinationSize, + void (*OutStream) TRIO_PROTO((trio_class_t *, int)), + TRIO_CONST char *format, + va_list *arglist, + trio_pointer_t *argarray) +{ + int status; + trio_class_t data; + trio_parameter_t parameters[MAX_PARAMETERS]; + + assert(VALID(OutStream)); + assert(VALID(format)); + + memset(&data, 0, sizeof(data)); + data.OutStream = OutStream; + data.location = destination; + data.max = destinationSize; + data.error = 0; + +#if defined(USE_LOCALE) + if (NULL == internalLocaleValues) + { + TrioSetLocale(); + } +#endif + + status = TrioParse(TYPE_PRINT, format, parameters, arglist, argarray); + if (status < 0) + return status; + + status = TrioFormatProcess(&data, format, parameters); + if (data.error != 0) + { + status = data.error; + } + return status; +} + +/************************************************************************* + * TrioOutStreamFile + */ +TRIO_PRIVATE void +TrioOutStreamFile +TRIO_ARGS2((self, output), + trio_class_t *self, + int output) +{ + FILE *file; + + assert(VALID(self)); + assert(VALID(self->location)); + + file = (FILE *)self->location; + self->processed++; + if (fputc(output, file) == EOF) + { + self->error = TRIO_ERROR_RETURN(TRIO_EOF, 0); + } + else + { + self->committed++; + } +} + +/************************************************************************* + * TrioOutStreamFileDescriptor + */ +TRIO_PRIVATE void +TrioOutStreamFileDescriptor +TRIO_ARGS2((self, output), + trio_class_t *self, + int output) +{ + int fd; + char ch; + + assert(VALID(self)); + + fd = *((int *)self->location); + ch = (char)output; + self->processed++; + if (write(fd, &ch, sizeof(char)) == -1) + { + self->error = TRIO_ERROR_RETURN(TRIO_ERRNO, 0); + } + else + { + self->committed++; + } +} + +/************************************************************************* + * TrioOutStreamCustom + */ +TRIO_PRIVATE void +TrioOutStreamCustom +TRIO_ARGS2((self, output), + trio_class_t *self, + int output) +{ + int status; + trio_custom_t *data; + + assert(VALID(self)); + assert(VALID(self->location)); + + data = (trio_custom_t *)self->location; + if (data->stream.out) + { + status = (data->stream.out)(data->closure, output); + if (status >= 0) + { + self->committed++; + } + else + { + if (self->error == 0) + { + self->error = TRIO_ERROR_RETURN(TRIO_ECUSTOM, -status); + } + } + } + self->processed++; +} + +/************************************************************************* + * TrioOutStreamString + */ +TRIO_PRIVATE void +TrioOutStreamString +TRIO_ARGS2((self, output), + trio_class_t *self, + int output) +{ + char **buffer; + + assert(VALID(self)); + assert(VALID(self->location)); + + buffer = (char **)self->location; + **buffer = (char)output; + (*buffer)++; + self->processed++; + self->committed++; +} + +/************************************************************************* + * TrioOutStreamStringMax + */ +TRIO_PRIVATE void +TrioOutStreamStringMax +TRIO_ARGS2((self, output), + trio_class_t *self, + int output) +{ + char **buffer; + + assert(VALID(self)); + assert(VALID(self->location)); + + buffer = (char **)self->location; + + if (self->processed < self->max) + { + **buffer = (char)output; + (*buffer)++; + self->committed++; + } + self->processed++; +} + +/************************************************************************* + * TrioOutStreamStringDynamic + */ +TRIO_PRIVATE void +TrioOutStreamStringDynamic +TRIO_ARGS2((self, output), + trio_class_t *self, + int output) +{ + assert(VALID(self)); + assert(VALID(self->location)); + + if (self->error == 0) + { + trio_xstring_append_char((trio_string_t *)self->location, + (char)output); + self->committed++; + } + /* The processed variable must always be increased */ + self->processed++; +} + +/************************************************************************* + * + * Formatted printing functions + * + ************************************************************************/ + +#if defined(TRIO_DOCUMENTATION) +# include "doc/doc_printf.h" +#endif +/** @addtogroup Printf + @{ +*/ + +/************************************************************************* + * printf + */ + +/** + Print to standard output stream. + + @param format Formatting string. + @param ... Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_printf +TRIO_VARGS2((format, va_alist), + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioFormat(stdout, 0, TrioOutStreamFile, format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +/** + Print to standard output stream. + + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_vprintf +TRIO_ARGS2((format, args), + TRIO_CONST char *format, + va_list args) +{ + assert(VALID(format)); + + return TrioFormat(stdout, 0, TrioOutStreamFile, format, &args, NULL); +} + +/** + Print to standard output stream. + + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_printfv +TRIO_ARGS2((format, args), + TRIO_CONST char *format, + trio_pointer_t * args) +{ + assert(VALID(format)); + + return TrioFormat(stdout, 0, TrioOutStreamFile, format, NULL, args); +} + +/************************************************************************* + * fprintf + */ + +/** + Print to file. + + @param file File pointer. + @param format Formatting string. + @param ... Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_fprintf +TRIO_VARGS3((file, format, va_alist), + FILE *file, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(file)); + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioFormat(file, 0, TrioOutStreamFile, format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +/** + Print to file. + + @param file File pointer. + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_vfprintf +TRIO_ARGS3((file, format, args), + FILE *file, + TRIO_CONST char *format, + va_list args) +{ + assert(VALID(file)); + assert(VALID(format)); + + return TrioFormat(file, 0, TrioOutStreamFile, format, &args, NULL); +} + +/** + Print to file. + + @param file File pointer. + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_fprintfv +TRIO_ARGS3((file, format, args), + FILE *file, + TRIO_CONST char *format, + trio_pointer_t * args) +{ + assert(VALID(file)); + assert(VALID(format)); + + return TrioFormat(file, 0, TrioOutStreamFile, format, NULL, args); +} + +/************************************************************************* + * dprintf + */ + +/** + Print to file descriptor. + + @param fd File descriptor. + @param format Formatting string. + @param ... Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_dprintf +TRIO_VARGS3((fd, format, va_alist), + int fd, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioFormat(&fd, 0, TrioOutStreamFileDescriptor, format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +/** + Print to file descriptor. + + @param fd File descriptor. + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_vdprintf +TRIO_ARGS3((fd, format, args), + int fd, + TRIO_CONST char *format, + va_list args) +{ + assert(VALID(format)); + + return TrioFormat(&fd, 0, TrioOutStreamFileDescriptor, format, &args, NULL); +} + +/** + Print to file descriptor. + + @param fd File descriptor. + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_dprintfv +TRIO_ARGS3((fd, format, args), + int fd, + TRIO_CONST char *format, + trio_pointer_t *args) +{ + assert(VALID(format)); + + return TrioFormat(&fd, 0, TrioOutStreamFileDescriptor, format, NULL, args); +} + +/************************************************************************* + * cprintf + */ +TRIO_PUBLIC int +trio_cprintf +TRIO_VARGS4((stream, closure, format, va_alist), + trio_outstream_t stream, + trio_pointer_t closure, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + trio_custom_t data; + + assert(VALID(stream)); + assert(VALID(format)); + + TRIO_VA_START(args, format); + data.stream.out = stream; + data.closure = closure; + status = TrioFormat(&data, 0, TrioOutStreamCustom, format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +TRIO_PUBLIC int +trio_vcprintf +TRIO_ARGS4((stream, closure, format, args), + trio_outstream_t stream, + trio_pointer_t closure, + TRIO_CONST char *format, + va_list args) +{ + trio_custom_t data; + + assert(VALID(stream)); + assert(VALID(format)); + + data.stream.out = stream; + data.closure = closure; + return TrioFormat(&data, 0, TrioOutStreamCustom, format, &args, NULL); +} + +TRIO_PUBLIC int +trio_cprintfv +TRIO_ARGS4((stream, closure, format, args), + trio_outstream_t stream, + trio_pointer_t closure, + TRIO_CONST char *format, + void **args) +{ + trio_custom_t data; + + assert(VALID(stream)); + assert(VALID(format)); + + data.stream.out = stream; + data.closure = closure; + return TrioFormat(&data, 0, TrioOutStreamCustom, format, NULL, args); +} + +/************************************************************************* + * sprintf + */ + +/** + Print to string. + + @param buffer Output string. + @param format Formatting string. + @param ... Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_sprintf +TRIO_VARGS3((buffer, format, va_alist), + char *buffer, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(buffer)); + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioFormat(&buffer, 0, TrioOutStreamString, format, &args, NULL); + *buffer = NIL; /* Terminate with NIL character */ + TRIO_VA_END(args); + return status; +} + +/** + Print to string. + + @param buffer Output string. + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_vsprintf +TRIO_ARGS3((buffer, format, args), + char *buffer, + TRIO_CONST char *format, + va_list args) +{ + int status; + + assert(VALID(buffer)); + assert(VALID(format)); + + status = TrioFormat(&buffer, 0, TrioOutStreamString, format, &args, NULL); + *buffer = NIL; + return status; +} + +/** + Print to string. + + @param buffer Output string. + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_sprintfv +TRIO_ARGS3((buffer, format, args), + char *buffer, + TRIO_CONST char *format, + trio_pointer_t *args) +{ + int status; + + assert(VALID(buffer)); + assert(VALID(format)); + + status = TrioFormat(&buffer, 0, TrioOutStreamString, format, NULL, args); + *buffer = NIL; + return status; +} + +/************************************************************************* + * snprintf + */ + +/** + Print at most @p max characters to string. + + @param buffer Output string. + @param max Maximum number of characters to print. + @param format Formatting string. + @param ... Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_snprintf +TRIO_VARGS4((buffer, max, format, va_alist), + char *buffer, + size_t max, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(buffer)); + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioFormat(&buffer, max > 0 ? max - 1 : 0, + TrioOutStreamStringMax, format, &args, NULL); + if (max > 0) + *buffer = NIL; + TRIO_VA_END(args); + return status; +} + +/** + Print at most @p max characters to string. + + @param buffer Output string. + @param max Maximum number of characters to print. + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_vsnprintf +TRIO_ARGS4((buffer, max, format, args), + char *buffer, + size_t max, + TRIO_CONST char *format, + va_list args) +{ + int status; + + assert(VALID(buffer)); + assert(VALID(format)); + + status = TrioFormat(&buffer, max > 0 ? max - 1 : 0, + TrioOutStreamStringMax, format, &args, NULL); + if (max > 0) + *buffer = NIL; + return status; +} + +/** + Print at most @p max characters to string. + + @param buffer Output string. + @param max Maximum number of characters to print. + @param format Formatting string. + @param args Arguments. + @return Number of printed characters. + */ +TRIO_PUBLIC int +trio_snprintfv +TRIO_ARGS4((buffer, max, format, args), + char *buffer, + size_t max, + TRIO_CONST char *format, + trio_pointer_t *args) +{ + int status; + + assert(VALID(buffer)); + assert(VALID(format)); + + status = TrioFormat(&buffer, max > 0 ? max - 1 : 0, + TrioOutStreamStringMax, format, NULL, args); + if (max > 0) + *buffer = NIL; + return status; +} + +/************************************************************************* + * snprintfcat + * Appends the new string to the buffer string overwriting the '\0' + * character at the end of buffer. + */ +TRIO_PUBLIC int +trio_snprintfcat +TRIO_VARGS4((buffer, max, format, va_alist), + char *buffer, + size_t max, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + size_t buf_len; + + TRIO_VA_START(args, format); + + assert(VALID(buffer)); + assert(VALID(format)); + + buf_len = trio_length(buffer); + buffer = &buffer[buf_len]; + + status = TrioFormat(&buffer, max - 1 - buf_len, + TrioOutStreamStringMax, format, &args, NULL); + TRIO_VA_END(args); + *buffer = NIL; + return status; +} + +TRIO_PUBLIC int +trio_vsnprintfcat +TRIO_ARGS4((buffer, max, format, args), + char *buffer, + size_t max, + TRIO_CONST char *format, + va_list args) +{ + int status; + size_t buf_len; + + assert(VALID(buffer)); + assert(VALID(format)); + + buf_len = trio_length(buffer); + buffer = &buffer[buf_len]; + status = TrioFormat(&buffer, max - 1 - buf_len, + TrioOutStreamStringMax, format, &args, NULL); + *buffer = NIL; + return status; +} + +/************************************************************************* + * trio_aprintf + */ + +/* Deprecated */ +TRIO_PUBLIC char * +trio_aprintf +TRIO_VARGS2((format, va_alist), + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + va_list args; + trio_string_t *info; + char *result = NULL; + + assert(VALID(format)); + + info = trio_xstring_duplicate(""); + if (info) + { + TRIO_VA_START(args, format); + (void)TrioFormat(info, 0, TrioOutStreamStringDynamic, + format, &args, NULL); + TRIO_VA_END(args); + + trio_string_terminate(info); + result = trio_string_extract(info); + trio_string_destroy(info); + } + return result; +} + +/* Deprecated */ +TRIO_PUBLIC char * +trio_vaprintf +TRIO_ARGS2((format, args), + TRIO_CONST char *format, + va_list args) +{ + trio_string_t *info; + char *result = NULL; + + assert(VALID(format)); + + info = trio_xstring_duplicate(""); + if (info) + { + (void)TrioFormat(info, 0, TrioOutStreamStringDynamic, + format, &args, NULL); + trio_string_terminate(info); + result = trio_string_extract(info); + trio_string_destroy(info); + } + return result; +} + +TRIO_PUBLIC int +trio_asprintf +TRIO_VARGS3((result, format, va_alist), + char **result, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + va_list args; + int status; + trio_string_t *info; + + assert(VALID(format)); + + *result = NULL; + + info = trio_xstring_duplicate(""); + if (info == NULL) + { + status = TRIO_ERROR_RETURN(TRIO_ENOMEM, 0); + } + else + { + TRIO_VA_START(args, format); + status = TrioFormat(info, 0, TrioOutStreamStringDynamic, + format, &args, NULL); + TRIO_VA_END(args); + if (status >= 0) + { + trio_string_terminate(info); + *result = trio_string_extract(info); + } + trio_string_destroy(info); + } + return status; +} + +TRIO_PUBLIC int +trio_vasprintf +TRIO_ARGS3((result, format, args), + char **result, + TRIO_CONST char *format, + va_list args) +{ + int status; + trio_string_t *info; + + assert(VALID(format)); + + *result = NULL; + + info = trio_xstring_duplicate(""); + if (info == NULL) + { + status = TRIO_ERROR_RETURN(TRIO_ENOMEM, 0); + } + else + { + status = TrioFormat(info, 0, TrioOutStreamStringDynamic, + format, &args, NULL); + if (status >= 0) + { + trio_string_terminate(info); + *result = trio_string_extract(info); + } + trio_string_destroy(info); + } + return status; +} + +/** @} End of Printf documentation module */ + +/************************************************************************* + * + * CALLBACK + * + ************************************************************************/ + +#if defined(TRIO_DOCUMENTATION) +# include "doc/doc_register.h" +#endif +/** + @addtogroup UserDefined + @{ +*/ + +#if TRIO_EXTENSION + +/************************************************************************* + * trio_register + */ + +/** + Register new user-defined specifier. + + @param callback + @param name + @return Handle. + */ +TRIO_PUBLIC trio_pointer_t +trio_register +TRIO_ARGS2((callback, name), + trio_callback_t callback, + TRIO_CONST char *name) +{ + trio_userdef_t *def; + trio_userdef_t *prev = NULL; + + if (callback == NULL) + return NULL; + + if (name) + { + /* Handle built-in namespaces */ + if (name[0] == ':') + { + if (trio_equal(name, ":enter")) + { + internalEnterCriticalRegion = callback; + } + else if (trio_equal(name, ":leave")) + { + internalLeaveCriticalRegion = callback; + } + return NULL; + } + + /* Bail out if namespace is too long */ + if (trio_length(name) >= MAX_USER_NAME) + return NULL; + + /* Bail out if namespace already is registered */ + def = TrioFindNamespace(name, &prev); + if (def) + return NULL; + } + + def = (trio_userdef_t *)TRIO_MALLOC(sizeof(trio_userdef_t)); + if (def) + { + if (internalEnterCriticalRegion) + (void)internalEnterCriticalRegion(NULL); + + if (name) + { + /* Link into internal list */ + if (prev == NULL) + internalUserDef = def; + else + prev->next = def; + } + /* Initialize */ + def->callback = callback; + def->name = (name == NULL) + ? NULL + : trio_duplicate(name); + def->next = NULL; + + if (internalLeaveCriticalRegion) + (void)internalLeaveCriticalRegion(NULL); + } + return (trio_pointer_t)def; +} + +/** + Unregister an existing user-defined specifier. + + @param handle + */ +void +trio_unregister +TRIO_ARGS1((handle), + trio_pointer_t handle) +{ + trio_userdef_t *self = (trio_userdef_t *)handle; + trio_userdef_t *def; + trio_userdef_t *prev = NULL; + + assert(VALID(self)); + + if (self->name) + { + def = TrioFindNamespace(self->name, &prev); + if (def) + { + if (internalEnterCriticalRegion) + (void)internalEnterCriticalRegion(NULL); + + if (prev == NULL) + internalUserDef = NULL; + else + prev->next = def->next; + + if (internalLeaveCriticalRegion) + (void)internalLeaveCriticalRegion(NULL); + } + trio_destroy(self->name); + } + TRIO_FREE(self); +} + +/************************************************************************* + * trio_get_format [public] + */ +TRIO_CONST char * +trio_get_format +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ +#if defined(FORMAT_USER_DEFINED) + assert(((trio_reference_t *)ref)->parameter->type == FORMAT_USER_DEFINED); +#endif + + return (((trio_reference_t *)ref)->parameter->user_data); +} + +/************************************************************************* + * trio_get_argument [public] + */ +trio_pointer_t +trio_get_argument +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ +#if defined(FORMAT_USER_DEFINED) + assert(((trio_reference_t *)ref)->parameter->type == FORMAT_USER_DEFINED); +#endif + + return ((trio_reference_t *)ref)->parameter->data.pointer; +} + +/************************************************************************* + * trio_get_width / trio_set_width [public] + */ +int +trio_get_width +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return ((trio_reference_t *)ref)->parameter->width; +} + +void +trio_set_width +TRIO_ARGS2((ref, width), + trio_pointer_t ref, + int width) +{ + ((trio_reference_t *)ref)->parameter->width = width; +} + +/************************************************************************* + * trio_get_precision / trio_set_precision [public] + */ +int +trio_get_precision +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->precision); +} + +void +trio_set_precision +TRIO_ARGS2((ref, precision), + trio_pointer_t ref, + int precision) +{ + ((trio_reference_t *)ref)->parameter->precision = precision; +} + +/************************************************************************* + * trio_get_base / trio_set_base [public] + */ +int +trio_get_base +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->base); +} + +void +trio_set_base +TRIO_ARGS2((ref, base), + trio_pointer_t ref, + int base) +{ + ((trio_reference_t *)ref)->parameter->base = base; +} + +/************************************************************************* + * trio_get_long / trio_set_long [public] + */ +int +trio_get_long +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_LONG); +} + +void +trio_set_long +TRIO_ARGS2((ref, is_long), + trio_pointer_t ref, + int is_long) +{ + if (is_long) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_LONG; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_LONG; +} + +/************************************************************************* + * trio_get_longlong / trio_set_longlong [public] + */ +int +trio_get_longlong +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_QUAD); +} + +void +trio_set_longlong +TRIO_ARGS2((ref, is_longlong), + trio_pointer_t ref, + int is_longlong) +{ + if (is_longlong) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_QUAD; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_QUAD; +} + +/************************************************************************* + * trio_get_longdouble / trio_set_longdouble [public] + */ +int +trio_get_longdouble +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_LONGDOUBLE); +} + +void +trio_set_longdouble +TRIO_ARGS2((ref, is_longdouble), + trio_pointer_t ref, + int is_longdouble) +{ + if (is_longdouble) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_LONGDOUBLE; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_LONGDOUBLE; +} + +/************************************************************************* + * trio_get_short / trio_set_short [public] + */ +int +trio_get_short +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_SHORT); +} + +void +trio_set_short +TRIO_ARGS2((ref, is_short), + trio_pointer_t ref, + int is_short) +{ + if (is_short) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_SHORT; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_SHORT; +} + +/************************************************************************* + * trio_get_shortshort / trio_set_shortshort [public] + */ +int +trio_get_shortshort +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_SHORTSHORT); +} + +void +trio_set_shortshort +TRIO_ARGS2((ref, is_shortshort), + trio_pointer_t ref, + int is_shortshort) +{ + if (is_shortshort) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_SHORTSHORT; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_SHORTSHORT; +} + +/************************************************************************* + * trio_get_alternative / trio_set_alternative [public] + */ +int +trio_get_alternative +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_ALTERNATIVE); +} + +void +trio_set_alternative +TRIO_ARGS2((ref, is_alternative), + trio_pointer_t ref, + int is_alternative) +{ + if (is_alternative) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_ALTERNATIVE; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_ALTERNATIVE; +} + +/************************************************************************* + * trio_get_alignment / trio_set_alignment [public] + */ +int +trio_get_alignment +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_LEFTADJUST); +} + +void +trio_set_alignment +TRIO_ARGS2((ref, is_leftaligned), + trio_pointer_t ref, + int is_leftaligned) +{ + if (is_leftaligned) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_LEFTADJUST; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_LEFTADJUST; +} + +/************************************************************************* + * trio_get_spacing /trio_set_spacing [public] + */ +int +trio_get_spacing +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_SPACE); +} + +void +trio_set_spacing +TRIO_ARGS2((ref, is_space), + trio_pointer_t ref, + int is_space) +{ + if (is_space) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_SPACE; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_SPACE; +} + +/************************************************************************* + * trio_get_sign / trio_set_sign [public] + */ +int +trio_get_sign +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_SHOWSIGN); +} + +void +trio_set_sign +TRIO_ARGS2((ref, is_sign), + trio_pointer_t ref, + int is_sign) +{ + if (is_sign) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_SHOWSIGN; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_SHOWSIGN; +} + +/************************************************************************* + * trio_get_padding / trio_set_padding [public] + */ +int +trio_get_padding +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_NILPADDING); +} + +void +trio_set_padding +TRIO_ARGS2((ref, is_padding), + trio_pointer_t ref, + int is_padding) +{ + if (is_padding) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_NILPADDING; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_NILPADDING; +} + +/************************************************************************* + * trio_get_quote / trio_set_quote [public] + */ +int +trio_get_quote +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_QUOTE); +} + +void +trio_set_quote +TRIO_ARGS2((ref, is_quote), + trio_pointer_t ref, + int is_quote) +{ + if (is_quote) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_QUOTE; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_QUOTE; +} + +/************************************************************************* + * trio_get_upper / trio_set_upper [public] + */ +int +trio_get_upper +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_UPPER); +} + +void +trio_set_upper +TRIO_ARGS2((ref, is_upper), + trio_pointer_t ref, + int is_upper) +{ + if (is_upper) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_UPPER; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_UPPER; +} + +/************************************************************************* + * trio_get_largest / trio_set_largest [public] + */ +#if TRIO_C99 +int +trio_get_largest +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_INTMAX_T); +} + +void +trio_set_largest +TRIO_ARGS2((ref, is_largest), + trio_pointer_t ref, + int is_largest) +{ + if (is_largest) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_INTMAX_T; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_INTMAX_T; +} +#endif + +/************************************************************************* + * trio_get_ptrdiff / trio_set_ptrdiff [public] + */ +int +trio_get_ptrdiff +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_PTRDIFF_T); +} + +void +trio_set_ptrdiff +TRIO_ARGS2((ref, is_ptrdiff), + trio_pointer_t ref, + int is_ptrdiff) +{ + if (is_ptrdiff) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_PTRDIFF_T; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_PTRDIFF_T; +} + +/************************************************************************* + * trio_get_size / trio_set_size [public] + */ +#if TRIO_C99 +int +trio_get_size +TRIO_ARGS1((ref), + trio_pointer_t ref) +{ + return (((trio_reference_t *)ref)->parameter->flags & FLAGS_SIZE_T); +} + +void +trio_set_size +TRIO_ARGS2((ref, is_size), + trio_pointer_t ref, + int is_size) +{ + if (is_size) + ((trio_reference_t *)ref)->parameter->flags |= FLAGS_SIZE_T; + else + ((trio_reference_t *)ref)->parameter->flags &= ~FLAGS_SIZE_T; +} +#endif + +/************************************************************************* + * trio_print_int [public] + */ +void +trio_print_int +TRIO_ARGS2((ref, number), + trio_pointer_t ref, + int number) +{ + trio_reference_t *self = (trio_reference_t *)ref; + + TrioWriteNumber(self->data, + (trio_uintmax_t)number, + self->parameter->flags, + self->parameter->width, + self->parameter->precision, + self->parameter->base); +} + +/************************************************************************* + * trio_print_uint [public] + */ +void +trio_print_uint +TRIO_ARGS2((ref, number), + trio_pointer_t ref, + unsigned int number) +{ + trio_reference_t *self = (trio_reference_t *)ref; + + TrioWriteNumber(self->data, + (trio_uintmax_t)number, + self->parameter->flags | FLAGS_UNSIGNED, + self->parameter->width, + self->parameter->precision, + self->parameter->base); +} + +/************************************************************************* + * trio_print_double [public] + */ +void +trio_print_double +TRIO_ARGS2((ref, number), + trio_pointer_t ref, + double number) +{ + trio_reference_t *self = (trio_reference_t *)ref; + + TrioWriteDouble(self->data, + number, + self->parameter->flags, + self->parameter->width, + self->parameter->precision, + self->parameter->base); +} + +/************************************************************************* + * trio_print_string [public] + */ +void +trio_print_string +TRIO_ARGS2((ref, string), + trio_pointer_t ref, + char *string) +{ + trio_reference_t *self = (trio_reference_t *)ref; + + TrioWriteString(self->data, + string, + self->parameter->flags, + self->parameter->width, + self->parameter->precision); +} + +/************************************************************************* + * trio_print_ref [public] + */ +int +trio_print_ref +TRIO_VARGS3((ref, format, va_alist), + trio_pointer_t ref, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list arglist; + + assert(VALID(format)); + + TRIO_VA_START(arglist, format); + status = TrioFormatRef((trio_reference_t *)ref, format, &arglist, NULL); + TRIO_VA_END(arglist); + return status; +} + +/************************************************************************* + * trio_vprint_ref [public] + */ +int +trio_vprint_ref +TRIO_ARGS3((ref, format, arglist), + trio_pointer_t ref, + TRIO_CONST char *format, + va_list arglist) +{ + assert(VALID(format)); + + return TrioFormatRef((trio_reference_t *)ref, format, &arglist, NULL); +} + +/************************************************************************* + * trio_printv_ref [public] + */ +int +trio_printv_ref +TRIO_ARGS3((ref, format, argarray), + trio_pointer_t ref, + TRIO_CONST char *format, + trio_pointer_t *argarray) +{ + assert(VALID(format)); + + return TrioFormatRef((trio_reference_t *)ref, format, NULL, argarray); +} + +#endif /* TRIO_EXTENSION */ + +/************************************************************************* + * trio_print_pointer [public] + */ +void +trio_print_pointer +TRIO_ARGS2((ref, pointer), + trio_pointer_t ref, + trio_pointer_t pointer) +{ + trio_reference_t *self = (trio_reference_t *)ref; + unsigned long flags; + trio_uintmax_t number; + + if (NULL == pointer) + { + TRIO_CONST char *string = internalNullString; + while (*string) + self->data->OutStream(self->data, *string++); + } + else + { + /* + * The subtraction of the null pointer is a workaround + * to avoid a compiler warning. The performance overhead + * is negligible (and likely to be removed by an + * optimizing compiler). The (char *) casting is done + * to please ANSI C++. + */ + number = (trio_uintmax_t)((char *)pointer - (char *)0); + /* Shrink to size of pointer */ + number &= (trio_uintmax_t)-1; + flags = self->parameter->flags; + flags |= (FLAGS_UNSIGNED | FLAGS_ALTERNATIVE | + FLAGS_NILPADDING); + TrioWriteNumber(self->data, + number, + flags, + POINTER_WIDTH, + NO_PRECISION, + BASE_HEX); + } +} + +/** @} End of UserDefined documentation module */ + +/************************************************************************* + * + * LOCALES + * + ************************************************************************/ + +/************************************************************************* + * trio_locale_set_decimal_point + * + * Decimal point can only be one character. The input argument is a + * string to enable multibyte characters. At most MB_LEN_MAX characters + * will be used. + */ +TRIO_PUBLIC void +trio_locale_set_decimal_point +TRIO_ARGS1((decimalPoint), + char *decimalPoint) +{ +#if defined(USE_LOCALE) + if (NULL == internalLocaleValues) + { + TrioSetLocale(); + } +#endif + internalDecimalPointLength = trio_length(decimalPoint); + if (internalDecimalPointLength == 1) + { + internalDecimalPoint = *decimalPoint; + } + else + { + internalDecimalPoint = NIL; + trio_copy_max(internalDecimalPointString, + sizeof(internalDecimalPointString), + decimalPoint); + } +} + +/************************************************************************* + * trio_locale_set_thousand_separator + * + * See trio_locale_set_decimal_point + */ +TRIO_PUBLIC void +trio_locale_set_thousand_separator +TRIO_ARGS1((thousandSeparator), + char *thousandSeparator) +{ +#if defined(USE_LOCALE) + if (NULL == internalLocaleValues) + { + TrioSetLocale(); + } +#endif + trio_copy_max(internalThousandSeparator, + sizeof(internalThousandSeparator), + thousandSeparator); + internalThousandSeparatorLength = trio_length(internalThousandSeparator); +} + +/************************************************************************* + * trio_locale_set_grouping + * + * Array of bytes. Reversed order. + * + * CHAR_MAX : No further grouping + * 0 : Repeat last group for the remaining digits (not necessary + * as C strings are zero-terminated) + * n : Set current group to n + * + * Same order as the grouping attribute in LC_NUMERIC. + */ +TRIO_PUBLIC void +trio_locale_set_grouping +TRIO_ARGS1((grouping), + char *grouping) +{ +#if defined(USE_LOCALE) + if (NULL == internalLocaleValues) + { + TrioSetLocale(); + } +#endif + trio_copy_max(internalGrouping, + sizeof(internalGrouping), + grouping); +} + + +/************************************************************************* + * + * SCANNING + * + ************************************************************************/ + +/************************************************************************* + * TrioSkipWhitespaces + */ +TRIO_PRIVATE int +TrioSkipWhitespaces +TRIO_ARGS1((self), + trio_class_t *self) +{ + int ch; + + ch = self->current; + while (isspace(ch)) + { + self->InStream(self, &ch); + } + return ch; +} + +/************************************************************************* + * TrioGetCollation + */ +#if TRIO_EXTENSION +TRIO_PRIVATE void +TrioGetCollation(TRIO_NOARGS) +{ + int i; + int j; + int k; + char first[2]; + char second[2]; + + /* This is computational expensive */ + first[1] = NIL; + second[1] = NIL; + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + { + k = 0; + first[0] = (char)i; + for (j = 0; j < MAX_CHARACTER_CLASS; j++) + { + second[0] = (char)j; + if (trio_equal_locale(first, second)) + internalCollationArray[i][k++] = (char)j; + } + internalCollationArray[i][k] = NIL; + } +} +#endif + +/************************************************************************* + * TrioGetCharacterClass + * + * FIXME: + * multibyte + */ +TRIO_PRIVATE int +TrioGetCharacterClass +TRIO_ARGS4((format, indexPointer, flagsPointer, characterclass), + TRIO_CONST char *format, + int *indexPointer, + unsigned long *flagsPointer, + int *characterclass) +{ + int index = *indexPointer; + int i; + char ch; + char range_begin; + char range_end; + + *flagsPointer &= ~FLAGS_EXCLUDE; + + if (format[index] == QUALIFIER_CIRCUMFLEX) + { + *flagsPointer |= FLAGS_EXCLUDE; + index++; + } + /* + * If the ungroup character is at the beginning of the scanlist, + * it will be part of the class, and a second ungroup character + * must follow to end the group. + */ + if (format[index] == SPECIFIER_UNGROUP) + { + characterclass[(int)SPECIFIER_UNGROUP]++; + index++; + } + /* + * Minus is used to specify ranges. To include minus in the class, + * it must be at the beginning of the list + */ + if (format[index] == QUALIFIER_MINUS) + { + characterclass[(int)QUALIFIER_MINUS]++; + index++; + } + /* Collect characters */ + for (ch = format[index]; + (ch != SPECIFIER_UNGROUP) && (ch != NIL); + ch = format[++index]) + { + switch (ch) + { + case QUALIFIER_MINUS: /* Scanlist ranges */ + + /* + * Both C99 and UNIX98 describes ranges as implementation- + * defined. + * + * We support the following behaviour (although this may + * change as we become wiser) + * - only increasing ranges, ie. [a-b] but not [b-a] + * - transitive ranges, ie. [a-b-c] == [a-c] + * - trailing minus, ie. [a-] is interpreted as an 'a' + * and a '-' + * - duplicates (although we can easily convert these + * into errors) + */ + range_begin = format[index - 1]; + range_end = format[++index]; + if (range_end == SPECIFIER_UNGROUP) + { + /* Trailing minus is included */ + characterclass[(int)ch]++; + ch = range_end; + break; /* for */ + } + if (range_end == NIL) + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + if (range_begin > range_end) + return TRIO_ERROR_RETURN(TRIO_ERANGE, index); + + for (i = (int)range_begin; i <= (int)range_end; i++) + characterclass[i]++; + + ch = range_end; + break; + +#if TRIO_EXTENSION + + case SPECIFIER_GROUP: + + switch (format[index + 1]) + { + case QUALIFIER_DOT: /* Collating symbol */ + /* + * FIXME: This will be easier to implement when multibyte + * characters have been implemented. Until now, we ignore + * this feature. + */ + for (i = index + 2; ; i++) + { + if (format[i] == NIL) + /* Error in syntax */ + return -1; + else if (format[i] == QUALIFIER_DOT) + break; /* for */ + } + if (format[++i] != SPECIFIER_UNGROUP) + return -1; + + index = i; + break; + + case QUALIFIER_EQUAL: /* Equivalence class expressions */ + { + unsigned int j; + unsigned int k; + + if (internalCollationUnconverted) + { + /* Lazy evaluation of collation array */ + TrioGetCollation(); + internalCollationUnconverted = FALSE; + } + for (i = index + 2; ; i++) + { + if (format[i] == NIL) + /* Error in syntax */ + return -1; + else if (format[i] == QUALIFIER_EQUAL) + break; /* for */ + else + { + /* Mark any equivalent character */ + k = (unsigned int)format[i]; + for (j = 0; internalCollationArray[k][j] != NIL; j++) + characterclass[(int)internalCollationArray[k][j]]++; + } + } + if (format[++i] != SPECIFIER_UNGROUP) + return -1; + + index = i; + } + break; + + case QUALIFIER_COLON: /* Character class expressions */ + + if (trio_equal_max(CLASS_ALNUM, sizeof(CLASS_ALNUM) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (isalnum(i)) + characterclass[i]++; + index += sizeof(CLASS_ALNUM) - 1; + } + else if (trio_equal_max(CLASS_ALPHA, sizeof(CLASS_ALPHA) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (isalpha(i)) + characterclass[i]++; + index += sizeof(CLASS_ALPHA) - 1; + } + else if (trio_equal_max(CLASS_CNTRL, sizeof(CLASS_CNTRL) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (iscntrl(i)) + characterclass[i]++; + index += sizeof(CLASS_CNTRL) - 1; + } + else if (trio_equal_max(CLASS_DIGIT, sizeof(CLASS_DIGIT) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (isdigit(i)) + characterclass[i]++; + index += sizeof(CLASS_DIGIT) - 1; + } + else if (trio_equal_max(CLASS_GRAPH, sizeof(CLASS_GRAPH) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (isgraph(i)) + characterclass[i]++; + index += sizeof(CLASS_GRAPH) - 1; + } + else if (trio_equal_max(CLASS_LOWER, sizeof(CLASS_LOWER) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (islower(i)) + characterclass[i]++; + index += sizeof(CLASS_LOWER) - 1; + } + else if (trio_equal_max(CLASS_PRINT, sizeof(CLASS_PRINT) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (isprint(i)) + characterclass[i]++; + index += sizeof(CLASS_PRINT) - 1; + } + else if (trio_equal_max(CLASS_PUNCT, sizeof(CLASS_PUNCT) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (ispunct(i)) + characterclass[i]++; + index += sizeof(CLASS_PUNCT) - 1; + } + else if (trio_equal_max(CLASS_SPACE, sizeof(CLASS_SPACE) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (isspace(i)) + characterclass[i]++; + index += sizeof(CLASS_SPACE) - 1; + } + else if (trio_equal_max(CLASS_UPPER, sizeof(CLASS_UPPER) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (isupper(i)) + characterclass[i]++; + index += sizeof(CLASS_UPPER) - 1; + } + else if (trio_equal_max(CLASS_XDIGIT, sizeof(CLASS_XDIGIT) - 1, + &format[index])) + { + for (i = 0; i < MAX_CHARACTER_CLASS; i++) + if (isxdigit(i)) + characterclass[i]++; + index += sizeof(CLASS_XDIGIT) - 1; + } + else + { + characterclass[(int)ch]++; + } + break; + + default: + characterclass[(int)ch]++; + break; + } + break; + +#endif /* TRIO_EXTENSION */ + + default: + characterclass[(int)ch]++; + break; + } + } + return 0; +} + +/************************************************************************* + * TrioReadNumber + * + * We implement our own number conversion in preference of strtol and + * strtoul, because we must handle 'long long' and thousand separators. + */ +TRIO_PRIVATE BOOLEAN_T +TrioReadNumber +TRIO_ARGS5((self, target, flags, width, base), + trio_class_t *self, + trio_uintmax_t *target, + unsigned long flags, + int width, + int base) +{ + trio_uintmax_t number = 0; + int digit; + int count; + BOOLEAN_T isNegative = FALSE; + BOOLEAN_T gotNumber = FALSE; + int j; + + assert(VALID(self)); + assert(VALID(self->InStream)); + assert((base >= MIN_BASE && base <= MAX_BASE) || (base == NO_BASE)); + + if (internalDigitsUnconverted) + { + /* Lazy evaluation of digits array */ + memset(internalDigitArray, -1, sizeof(internalDigitArray)); + for (j = 0; j < (int)sizeof(internalDigitsLower) - 1; j++) + { + internalDigitArray[(int)internalDigitsLower[j]] = j; + internalDigitArray[(int)internalDigitsUpper[j]] = j; + } + internalDigitsUnconverted = FALSE; + } + + TrioSkipWhitespaces(self); + + if (!(flags & FLAGS_UNSIGNED)) + { + /* Leading sign */ + if (self->current == '+') + { + self->InStream(self, NULL); + } + else if (self->current == '-') + { + self->InStream(self, NULL); + isNegative = TRUE; + } + } + + count = self->processed; + + if (flags & FLAGS_ALTERNATIVE) + { + switch (base) + { + case NO_BASE: + case BASE_OCTAL: + case BASE_HEX: + case BASE_BINARY: + if (self->current == '0') + { + self->InStream(self, NULL); + if (self->current) + { + if ((base == BASE_HEX) && + (toupper(self->current) == 'X')) + { + self->InStream(self, NULL); + } + else if ((base == BASE_BINARY) && + (toupper(self->current) == 'B')) + { + self->InStream(self, NULL); + } + } + } + else + return FALSE; + break; + default: + break; + } + } + + while (((width == NO_WIDTH) || (self->processed - count < width)) && + (! ((self->current == EOF) || isspace(self->current)))) + { + if (isascii(self->current)) + { + digit = internalDigitArray[self->current]; + /* Abort if digit is not allowed in the specified base */ + if ((digit == -1) || (digit >= base)) + break; + } + else if (flags & FLAGS_QUOTE) + { + /* Compare with thousands separator */ + for (j = 0; internalThousandSeparator[j] && self->current; j++) + { + if (internalThousandSeparator[j] != self->current) + break; + + self->InStream(self, NULL); + } + if (internalThousandSeparator[j]) + break; /* Mismatch */ + else + continue; /* Match */ + } + else + break; + + number *= base; + number += digit; + gotNumber = TRUE; /* we need at least one digit */ + + self->InStream(self, NULL); + } + + /* Was anything read at all? */ + if (!gotNumber) + return FALSE; + + if (target) + *target = (isNegative) ? -((trio_intmax_t)number) : number; + return TRUE; +} + +/************************************************************************* + * TrioReadChar + */ +TRIO_PRIVATE int +TrioReadChar +TRIO_ARGS4((self, target, flags, width), + trio_class_t *self, + char *target, + unsigned long flags, + int width) +{ + int i; + char ch; + trio_uintmax_t number; + + assert(VALID(self)); + assert(VALID(self->InStream)); + + for (i = 0; + (self->current != EOF) && (i < width); + i++) + { + ch = (char)self->current; + self->InStream(self, NULL); + if ((flags & FLAGS_ALTERNATIVE) && (ch == CHAR_BACKSLASH)) + { + switch (self->current) + { + case '\\': ch = '\\'; break; + case 'a': ch = '\007'; break; + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + case 'v': ch = '\v'; break; + default: + if (isdigit(self->current)) + { + /* Read octal number */ + if (!TrioReadNumber(self, &number, 0, 3, BASE_OCTAL)) + return 0; + ch = (char)number; + } + else if (toupper(self->current) == 'X') + { + /* Read hexadecimal number */ + self->InStream(self, NULL); + if (!TrioReadNumber(self, &number, 0, 2, BASE_HEX)) + return 0; + ch = (char)number; + } + else + { + ch = (char)self->current; + } + break; + } + } + + if (target) + target[i] = ch; + } + return i + 1; +} + +/************************************************************************* + * TrioReadString + */ +TRIO_PRIVATE BOOLEAN_T +TrioReadString +TRIO_ARGS4((self, target, flags, width), + trio_class_t *self, + char *target, + unsigned long flags, + int width) +{ + int i; + + assert(VALID(self)); + assert(VALID(self->InStream)); + + TrioSkipWhitespaces(self); + + /* + * Continue until end of string is reached, a whitespace is encountered, + * or width is exceeded + */ + for (i = 0; + ((width == NO_WIDTH) || (i < width)) && + (! ((self->current == EOF) || isspace(self->current))); + i++) + { + if (TrioReadChar(self, (target ? &target[i] : 0), flags, 1) == 0) + break; /* for */ + } + if (target) + target[i] = NIL; + return TRUE; +} + +/************************************************************************* + * TrioReadWideChar + */ +#if TRIO_WIDECHAR +TRIO_PRIVATE int +TrioReadWideChar +TRIO_ARGS4((self, target, flags, width), + trio_class_t *self, + trio_wchar_t *target, + unsigned long flags, + int width) +{ + int i; + int j; + int size; + int amount = 0; + trio_wchar_t wch; + char buffer[MB_LEN_MAX + 1]; + + assert(VALID(self)); + assert(VALID(self->InStream)); + + for (i = 0; + (self->current != EOF) && (i < width); + i++) + { + if (isascii(self->current)) + { + if (TrioReadChar(self, buffer, flags, 1) == 0) + return 0; + buffer[1] = NIL; + } + else + { + /* + * Collect a multibyte character, by enlarging buffer until + * it contains a fully legal multibyte character, or the + * buffer is full. + */ + j = 0; + do + { + buffer[j++] = (char)self->current; + buffer[j] = NIL; + self->InStream(self, NULL); + } + while ((j < (int)sizeof(buffer)) && (mblen(buffer, (size_t)j) != j)); + } + if (target) + { + size = mbtowc(&wch, buffer, sizeof(buffer)); + if (size > 0) + target[i] = wch; + } + amount += size; + self->InStream(self, NULL); + } + return amount; +} +#endif /* TRIO_WIDECHAR */ + +/************************************************************************* + * TrioReadWideString + */ +#if TRIO_WIDECHAR +TRIO_PRIVATE BOOLEAN_T +TrioReadWideString +TRIO_ARGS4((self, target, flags, width), + trio_class_t *self, + trio_wchar_t *target, + unsigned long flags, + int width) +{ + int i; + int size; + + assert(VALID(self)); + assert(VALID(self->InStream)); + + TrioSkipWhitespaces(self); + +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + (void)mblen(NULL, 0); +#endif + + /* + * Continue until end of string is reached, a whitespace is encountered, + * or width is exceeded + */ + for (i = 0; + ((width == NO_WIDTH) || (i < width)) && + (! ((self->current == EOF) || isspace(self->current))); + ) + { + size = TrioReadWideChar(self, &target[i], flags, 1); + if (size == 0) + break; /* for */ + + i += size; + } + if (target) + target[i] = WCONST('\0'); + return TRUE; +} +#endif /* TRIO_WIDECHAR */ + +/************************************************************************* + * TrioReadGroup + * + * FIXME: characterclass does not work with multibyte characters + */ +TRIO_PRIVATE BOOLEAN_T +TrioReadGroup +TRIO_ARGS5((self, target, characterclass, flags, width), + trio_class_t *self, + char *target, + int *characterclass, + unsigned long flags, + int width) +{ + int ch; + int i; + + assert(VALID(self)); + assert(VALID(self->InStream)); + + ch = self->current; + for (i = 0; + ((width == NO_WIDTH) || (i < width)) && + (! ((ch == EOF) || + (((flags & FLAGS_EXCLUDE) != 0) ^ (characterclass[ch] == 0)))); + i++) + { + if (target) + target[i] = (char)ch; + self->InStream(self, &ch); + } + + if (target) + target[i] = NIL; + return TRUE; +} + +/************************************************************************* + * TrioReadDouble + * + * FIXME: + * add long double + * handle base + */ +TRIO_PRIVATE BOOLEAN_T +TrioReadDouble +TRIO_ARGS4((self, target, flags, width), + trio_class_t *self, + trio_pointer_t target, + unsigned long flags, + int width) +{ + int ch; + char doubleString[512]; + int index = 0; + int start; + int j; + BOOLEAN_T isHex = FALSE; + + doubleString[0] = 0; + + if ((width == NO_WIDTH) || (width > (int)sizeof(doubleString) - 1)) + width = sizeof(doubleString) - 1; + + TrioSkipWhitespaces(self); + + /* + * Read entire double number from stream. trio_to_double requires + * a string as input, but InStream can be anything, so we have to + * collect all characters. + */ + ch = self->current; + if ((ch == '+') || (ch == '-')) + { + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + width--; + } + + start = index; + switch (ch) + { + case 'n': + case 'N': + /* Not-a-number */ + if (index != 0) + break; + /* FALLTHROUGH */ + case 'i': + case 'I': + /* Infinity */ + while (isalpha(ch) && (index - start < width)) + { + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + } + doubleString[index] = NIL; + + /* Case insensitive string comparison */ + if (trio_equal(&doubleString[start], INFINITE_UPPER) || + trio_equal(&doubleString[start], LONG_INFINITE_UPPER)) + { + if (flags & FLAGS_LONGDOUBLE) + { + if ((start == 1) && (doubleString[0] == '-')) + { + *((trio_long_double_t *)target) = trio_ninf(); + } + else + { + *((trio_long_double_t *)target) = trio_pinf(); + } + } + else + { + if ((start == 1) && (doubleString[0] == '-')) + { + *((double *)target) = trio_ninf(); + } + else + { + *((double *)target) = trio_pinf(); + } + } + return TRUE; + } + if (trio_equal(doubleString, NAN_UPPER)) + { + /* NaN must not have a preceeding + nor - */ + if (flags & FLAGS_LONGDOUBLE) + { + *((trio_long_double_t *)target) = trio_nan(); + } + else + { + *((double *)target) = trio_nan(); + } + return TRUE; + } + return FALSE; + + case '0': + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + if (toupper(ch) == 'X') + { + isHex = TRUE; + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + } + break; + + default: + break; + } + + while ((ch != EOF) && (index - start < width)) + { + /* Integer part */ + if (isHex ? isxdigit(ch) : isdigit(ch)) + { + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + } + else if (flags & FLAGS_QUOTE) + { + /* Compare with thousands separator */ + for (j = 0; internalThousandSeparator[j] && self->current; j++) + { + if (internalThousandSeparator[j] != self->current) + break; + + self->InStream(self, &ch); + } + if (internalThousandSeparator[j]) + break; /* Mismatch */ + else + continue; /* Match */ + } + else + break; /* while */ + } + if (ch == '.') + { + /* Decimal part */ + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + while ((isHex ? isxdigit(ch) : isdigit(ch)) && + (index - start < width)) + { + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + } + if (isHex ? (toupper(ch) == 'P') : (toupper(ch) == 'E')) + { + /* Exponent */ + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + if ((ch == '+') || (ch == '-')) + { + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + } + while ((isHex ? isxdigit(ch) : isdigit(ch)) && + (index - start < width)) + { + doubleString[index++] = (char)ch; + self->InStream(self, &ch); + } + } + } + + if ((index == start) || (*doubleString == NIL)) + return FALSE; + + doubleString[index] = 0; + + if (flags & FLAGS_LONGDOUBLE) + { + *((trio_long_double_t *)target) = trio_to_long_double(doubleString, NULL); + } + else + { + *((double *)target) = trio_to_double(doubleString, NULL); + } + return TRUE; +} + +/************************************************************************* + * TrioReadPointer + */ +TRIO_PRIVATE BOOLEAN_T +TrioReadPointer +TRIO_ARGS3((self, target, flags), + trio_class_t *self, + trio_pointer_t *target, + unsigned long flags) +{ + trio_uintmax_t number; + char buffer[sizeof(internalNullString)]; + + flags |= (FLAGS_UNSIGNED | FLAGS_ALTERNATIVE | FLAGS_NILPADDING); + + if (TrioReadNumber(self, + &number, + flags, + POINTER_WIDTH, + BASE_HEX)) + { + /* + * The strange assignment of number is a workaround for a compiler + * warning + */ + if (target) + *target = (char *)0 + number; + return TRUE; + } + else if (TrioReadString(self, + (flags & FLAGS_IGNORE) + ? NULL + : buffer, + 0, + sizeof(internalNullString) - 1)) + { + if (trio_equal_case(buffer, internalNullString)) + { + if (target) + *target = NULL; + return TRUE; + } + } + return FALSE; +} + +/************************************************************************* + * TrioScanProcess + */ +TRIO_PRIVATE int +TrioScanProcess +TRIO_ARGS3((data, format, parameters), + trio_class_t *data, + TRIO_CONST char *format, + trio_parameter_t *parameters) +{ +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + int charlen; + int cnt; +#endif + int assignment; + int ch; + int index; /* Index of format string */ + int i; /* Index of current parameter */ + unsigned long flags; + int width; + int base; + trio_pointer_t pointer; + + assignment = 0; + i = 0; + index = 0; + data->InStream(data, &ch); + +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + (void)mblen(NULL, 0); +#endif + + while (format[index]) + { +#if defined(TRIO_COMPILER_SUPPORTS_MULTIBYTE) + if (! isascii(format[index])) + { + charlen = mblen(&format[index], MB_LEN_MAX); + if (charlen != -1) + { + /* Compare multibyte characters in format string */ + for (cnt = 0; cnt < charlen - 1; cnt++) + { + if (ch != format[index + cnt]) + { + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + } + data->InStream(data, &ch); + } + continue; /* while characters left in formatting string */ + } + } +#endif /* TRIO_COMPILER_SUPPORTS_MULTIBYTE */ + + if ((EOF == ch) && (parameters[i].type != FORMAT_COUNT)) + { + return (assignment > 0) ? assignment : EOF; + } + + if (CHAR_IDENTIFIER == format[index]) + { + if (CHAR_IDENTIFIER == format[index + 1]) + { + /* Two % in format matches one % in input stream */ + if (CHAR_IDENTIFIER == ch) + { + data->InStream(data, &ch); + index += 2; + continue; /* while format chars left */ + } + else + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + } + + /* Skip the parameter entries */ + while (parameters[i].type == FORMAT_PARAMETER) + i++; + + flags = parameters[i].flags; + /* Find width */ + width = parameters[i].width; + if (flags & FLAGS_WIDTH_PARAMETER) + { + /* Get width from parameter list */ + width = (int)parameters[width].data.number.as_signed; + } + /* Find base */ + base = parameters[i].base; + if (flags & FLAGS_BASE_PARAMETER) + { + /* Get base from parameter list */ + base = (int)parameters[base].data.number.as_signed; + } + + switch (parameters[i].type) + { + case FORMAT_INT: + { + trio_uintmax_t number; + + if (0 == base) + base = BASE_DECIMAL; + + if (!TrioReadNumber(data, + &number, + flags, + width, + base)) + return assignment; + + if (!(flags & FLAGS_IGNORE)) + { + assignment++; + + pointer = parameters[i].data.pointer; +#if defined(QUALIFIER_SIZE_T) || defined(QUALIFIER_SIZE_T_UPPER) + if (flags & FLAGS_SIZE_T) + *(size_t *)pointer = (size_t)number; + else +#endif +#if defined(QUALIFIER_PTRDIFF_T) + if (flags & FLAGS_PTRDIFF_T) + *(ptrdiff_t *)pointer = (ptrdiff_t)number; + else +#endif +#if defined(QUALIFIER_INTMAX_T) + if (flags & FLAGS_INTMAX_T) + *(trio_intmax_t *)pointer = (trio_intmax_t)number; + else +#endif + if (flags & FLAGS_QUAD) + *(trio_ulonglong_t *)pointer = (trio_ulonglong_t)number; + else if (flags & FLAGS_LONG) + *(long int *)pointer = (long int)number; + else if (flags & FLAGS_SHORT) + *(short int *)pointer = (short int)number; + else + *(int *)pointer = (int)number; + } + } + break; /* FORMAT_INT */ + + case FORMAT_STRING: +#if TRIO_WIDECHAR + if (flags & FLAGS_WIDECHAR) + { + if (!TrioReadWideString(data, + (flags & FLAGS_IGNORE) + ? NULL + : parameters[i].data.wstring, + flags, + width)) + return assignment; + } + else +#endif + { + if (!TrioReadString(data, + (flags & FLAGS_IGNORE) + ? NULL + : parameters[i].data.string, + flags, + width)) + return assignment; + } + if (!(flags & FLAGS_IGNORE)) + assignment++; + break; /* FORMAT_STRING */ + + case FORMAT_DOUBLE: + { + trio_pointer_t pointer; + + if (flags & FLAGS_IGNORE) + { + pointer = NULL; + } + else + { + pointer = (flags & FLAGS_LONGDOUBLE) + ? (trio_pointer_t)parameters[i].data.longdoublePointer + : (trio_pointer_t)parameters[i].data.doublePointer; + } + if (!TrioReadDouble(data, pointer, flags, width)) + { + return assignment; + } + if (!(flags & FLAGS_IGNORE)) + { + assignment++; + } + break; /* FORMAT_DOUBLE */ + } + case FORMAT_GROUP: + { + int characterclass[MAX_CHARACTER_CLASS + 1]; + int rc; + + /* Skip over modifiers */ + while (format[index] != SPECIFIER_GROUP) + { + index++; + } + /* Skip over group specifier */ + index++; + + memset(characterclass, 0, sizeof(characterclass)); + rc = TrioGetCharacterClass(format, + &index, + &flags, + characterclass); + if (rc < 0) + return rc; + + if (!TrioReadGroup(data, + (flags & FLAGS_IGNORE) + ? NULL + : parameters[i].data.string, + characterclass, + flags, + parameters[i].width)) + return assignment; + if (!(flags & FLAGS_IGNORE)) + assignment++; + } + break; /* FORMAT_GROUP */ + + case FORMAT_COUNT: + pointer = parameters[i].data.pointer; + if (NULL != pointer) + { + int count = data->committed; + if (ch != EOF) + count--; /* a character is read, but is not consumed yet */ +#if defined(QUALIFIER_SIZE_T) || defined(QUALIFIER_SIZE_T_UPPER) + if (flags & FLAGS_SIZE_T) + *(size_t *)pointer = (size_t)count; + else +#endif +#if defined(QUALIFIER_PTRDIFF_T) + if (flags & FLAGS_PTRDIFF_T) + *(ptrdiff_t *)pointer = (ptrdiff_t)count; + else +#endif +#if defined(QUALIFIER_INTMAX_T) + if (flags & FLAGS_INTMAX_T) + *(trio_intmax_t *)pointer = (trio_intmax_t)count; + else +#endif + if (flags & FLAGS_QUAD) + { + *(trio_ulonglong_t *)pointer = (trio_ulonglong_t)count; + } + else if (flags & FLAGS_LONG) + { + *(long int *)pointer = (long int)count; + } + else if (flags & FLAGS_SHORT) + { + *(short int *)pointer = (short int)count; + } + else + { + *(int *)pointer = (int)count; + } + } + break; /* FORMAT_COUNT */ + + case FORMAT_CHAR: +#if TRIO_WIDECHAR + if (flags & FLAGS_WIDECHAR) + { + if (TrioReadWideChar(data, + (flags & FLAGS_IGNORE) + ? NULL + : parameters[i].data.wstring, + flags, + (width == NO_WIDTH) ? 1 : width) == 0) + return assignment; + } + else +#endif + { + if (TrioReadChar(data, + (flags & FLAGS_IGNORE) + ? NULL + : parameters[i].data.string, + flags, + (width == NO_WIDTH) ? 1 : width) == 0) + return assignment; + } + if (!(flags & FLAGS_IGNORE)) + assignment++; + break; /* FORMAT_CHAR */ + + case FORMAT_POINTER: + if (!TrioReadPointer(data, + (flags & FLAGS_IGNORE) + ? NULL + : (trio_pointer_t *)parameters[i].data.pointer, + flags)) + return assignment; + if (!(flags & FLAGS_IGNORE)) + assignment++; + break; /* FORMAT_POINTER */ + + case FORMAT_PARAMETER: + break; /* FORMAT_PARAMETER */ + + default: + return TRIO_ERROR_RETURN(TRIO_EINVAL, index); + } + ch = data->current; + index = parameters[i].indexAfterSpecifier; + i++; + } + else /* Not an % identifier */ + { + if (isspace((int)format[index])) + { + /* Whitespaces may match any amount of whitespaces */ + ch = TrioSkipWhitespaces(data); + } + else if (ch == format[index]) + { + data->InStream(data, &ch); + } + else + return assignment; + + index++; + } + } + return assignment; +} + +/************************************************************************* + * TrioScan + */ +TRIO_PRIVATE int +TrioScan +TRIO_ARGS6((source, sourceSize, InStream, format, arglist, argarray), + trio_pointer_t source, + size_t sourceSize, + void (*InStream) TRIO_PROTO((trio_class_t *, int *)), + TRIO_CONST char *format, + va_list *arglist, + trio_pointer_t *argarray) +{ + int status; + trio_parameter_t parameters[MAX_PARAMETERS]; + trio_class_t data; + + assert(VALID(InStream)); + assert(VALID(format)); + + memset(&data, 0, sizeof(data)); + data.InStream = InStream; + data.location = (trio_pointer_t)source; + data.max = sourceSize; + data.error = 0; + +#if defined(USE_LOCALE) + if (NULL == internalLocaleValues) + { + TrioSetLocale(); + } +#endif + + status = TrioParse(TYPE_SCAN, format, parameters, arglist, argarray); + if (status < 0) + return status; + + status = TrioScanProcess(&data, format, parameters); + if (data.error != 0) + { + status = data.error; + } + return status; +} + +/************************************************************************* + * TrioInStreamFile + */ +TRIO_PRIVATE void +TrioInStreamFile +TRIO_ARGS2((self, intPointer), + trio_class_t *self, + int *intPointer) +{ + FILE *file = (FILE *)self->location; + + assert(VALID(self)); + assert(VALID(file)); + + self->current = fgetc(file); + if (self->current == EOF) + { + self->error = (ferror(file)) + ? TRIO_ERROR_RETURN(TRIO_ERRNO, 0) + : TRIO_ERROR_RETURN(TRIO_EOF, 0); + } + else + { + self->processed++; + self->committed++; + } + + if (VALID(intPointer)) + { + *intPointer = self->current; + } +} + +/************************************************************************* + * TrioInStreamFileDescriptor + */ +TRIO_PRIVATE void +TrioInStreamFileDescriptor +TRIO_ARGS2((self, intPointer), + trio_class_t *self, + int *intPointer) +{ + int fd = *((int *)self->location); + int size; + unsigned char input; + + assert(VALID(self)); + + size = read(fd, &input, sizeof(char)); + if (size == -1) + { + self->error = TRIO_ERROR_RETURN(TRIO_ERRNO, 0); + self->current = EOF; + } + else + { + self->current = (size == 0) ? EOF : input; + } + if (self->current != EOF) + { + self->committed++; + self->processed++; + } + + if (VALID(intPointer)) + { + *intPointer = self->current; + } +} + +/************************************************************************* + * TrioInStreamCustom + */ +TRIO_PRIVATE void +TrioInStreamCustom +TRIO_ARGS2((self, intPointer), + trio_class_t *self, + int *intPointer) +{ + trio_custom_t *data; + + assert(VALID(self)); + assert(VALID(self->location)); + + data = (trio_custom_t *)self->location; + + self->current = (data->stream.in == NULL) + ? NIL + : (data->stream.in)(data->closure); + + if (self->current == NIL) + { + self->current = EOF; + } + else + { + self->processed++; + self->committed++; + } + + if (VALID(intPointer)) + { + *intPointer = self->current; + } +} + +/************************************************************************* + * TrioInStreamString + */ +TRIO_PRIVATE void +TrioInStreamString +TRIO_ARGS2((self, intPointer), + trio_class_t *self, + int *intPointer) +{ + unsigned char **buffer; + + assert(VALID(self)); + assert(VALID(self->location)); + + buffer = (unsigned char **)self->location; + self->current = (*buffer)[0]; + if (self->current == NIL) + { + self->current = EOF; + } + else + { + (*buffer)++; + self->processed++; + self->committed++; + } + + if (VALID(intPointer)) + { + *intPointer = self->current; + } +} + +/************************************************************************* + * + * Formatted scanning functions + * + ************************************************************************/ + +#if defined(TRIO_DOCUMENTATION) +# include "doc/doc_scanf.h" +#endif +/** @addtogroup Scanf + @{ +*/ + +/************************************************************************* + * scanf + */ + +/** + Scan characters from standard input stream. + + @param format Formatting string. + @param ... Arguments. + @return Number of scanned characters. + */ +TRIO_PUBLIC int +trio_scanf +TRIO_VARGS2((format, va_alist), + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioScan((trio_pointer_t)stdin, 0, + TrioInStreamFile, + format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +TRIO_PUBLIC int +trio_vscanf +TRIO_ARGS2((format, args), + TRIO_CONST char *format, + va_list args) +{ + assert(VALID(format)); + + return TrioScan((trio_pointer_t)stdin, 0, + TrioInStreamFile, + format, &args, NULL); +} + +TRIO_PUBLIC int +trio_scanfv +TRIO_ARGS2((format, args), + TRIO_CONST char *format, + trio_pointer_t *args) +{ + assert(VALID(format)); + + return TrioScan((trio_pointer_t)stdin, 0, + TrioInStreamFile, + format, NULL, args); +} + +/************************************************************************* + * fscanf + */ +TRIO_PUBLIC int +trio_fscanf +TRIO_VARGS3((file, format, va_alist), + FILE *file, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(file)); + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioScan((trio_pointer_t)file, 0, + TrioInStreamFile, + format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +TRIO_PUBLIC int +trio_vfscanf +TRIO_ARGS3((file, format, args), + FILE *file, + TRIO_CONST char *format, + va_list args) +{ + assert(VALID(file)); + assert(VALID(format)); + + return TrioScan((trio_pointer_t)file, 0, + TrioInStreamFile, + format, &args, NULL); +} + +TRIO_PUBLIC int +trio_fscanfv +TRIO_ARGS3((file, format, args), + FILE *file, + TRIO_CONST char *format, + trio_pointer_t *args) +{ + assert(VALID(file)); + assert(VALID(format)); + + return TrioScan((trio_pointer_t)file, 0, + TrioInStreamFile, + format, NULL, args); +} + +/************************************************************************* + * dscanf + */ +TRIO_PUBLIC int +trio_dscanf +TRIO_VARGS3((fd, format, va_alist), + int fd, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioScan((trio_pointer_t)&fd, 0, + TrioInStreamFileDescriptor, + format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +TRIO_PUBLIC int +trio_vdscanf +TRIO_ARGS3((fd, format, args), + int fd, + TRIO_CONST char *format, + va_list args) +{ + assert(VALID(format)); + + return TrioScan((trio_pointer_t)&fd, 0, + TrioInStreamFileDescriptor, + format, &args, NULL); +} + +TRIO_PUBLIC int +trio_dscanfv +TRIO_ARGS3((fd, format, args), + int fd, + TRIO_CONST char *format, + trio_pointer_t *args) +{ + assert(VALID(format)); + + return TrioScan((trio_pointer_t)&fd, 0, + TrioInStreamFileDescriptor, + format, NULL, args); +} + +/************************************************************************* + * cscanf + */ +TRIO_PUBLIC int +trio_cscanf +TRIO_VARGS4((stream, closure, format, va_alist), + trio_instream_t stream, + trio_pointer_t closure, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + trio_custom_t data; + + assert(VALID(stream)); + assert(VALID(format)); + + TRIO_VA_START(args, format); + data.stream.in = stream; + data.closure = closure; + status = TrioScan(&data, 0, TrioInStreamCustom, format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +TRIO_PUBLIC int +trio_vcscanf +TRIO_ARGS4((stream, closure, format, args), + trio_instream_t stream, + trio_pointer_t closure, + TRIO_CONST char *format, + va_list args) +{ + trio_custom_t data; + + assert(VALID(stream)); + assert(VALID(format)); + + data.stream.in = stream; + data.closure = closure; + return TrioScan(&data, 0, TrioInStreamCustom, format, &args, NULL); +} + +TRIO_PUBLIC int +trio_cscanfv +TRIO_ARGS4((stream, closure, format, args), + trio_instream_t stream, + trio_pointer_t closure, + TRIO_CONST char *format, + trio_pointer_t *args) +{ + trio_custom_t data; + + assert(VALID(stream)); + assert(VALID(format)); + + data.stream.in = stream; + data.closure = closure; + return TrioScan(&data, 0, TrioInStreamCustom, format, NULL, args); +} + +/************************************************************************* + * sscanf + */ +TRIO_PUBLIC int +trio_sscanf +TRIO_VARGS3((buffer, format, va_alist), + TRIO_CONST char *buffer, + TRIO_CONST char *format, + TRIO_VA_DECL) +{ + int status; + va_list args; + + assert(VALID(buffer)); + assert(VALID(format)); + + TRIO_VA_START(args, format); + status = TrioScan((trio_pointer_t)&buffer, 0, + TrioInStreamString, + format, &args, NULL); + TRIO_VA_END(args); + return status; +} + +TRIO_PUBLIC int +trio_vsscanf +TRIO_ARGS3((buffer, format, args), + TRIO_CONST char *buffer, + TRIO_CONST char *format, + va_list args) +{ + assert(VALID(buffer)); + assert(VALID(format)); + + return TrioScan((trio_pointer_t)&buffer, 0, + TrioInStreamString, + format, &args, NULL); +} + +TRIO_PUBLIC int +trio_sscanfv +TRIO_ARGS3((buffer, format, args), + TRIO_CONST char *buffer, + TRIO_CONST char *format, + trio_pointer_t *args) +{ + assert(VALID(buffer)); + assert(VALID(format)); + + return TrioScan((trio_pointer_t)&buffer, 0, + TrioInStreamString, + format, NULL, args); +} + +/** @} End of Scanf documentation module */ + +/************************************************************************* + * trio_strerror + */ +TRIO_PUBLIC TRIO_CONST char * +trio_strerror +TRIO_ARGS1((errorcode), + int errorcode) +{ + /* Textual versions of the error codes */ + switch (TRIO_ERROR_CODE(errorcode)) + { + case TRIO_EOF: + return "End of file"; + case TRIO_EINVAL: + return "Invalid argument"; + case TRIO_ETOOMANY: + return "Too many arguments"; + case TRIO_EDBLREF: + return "Double reference"; + case TRIO_EGAP: + return "Reference gap"; + case TRIO_ENOMEM: + return "Out of memory"; + case TRIO_ERANGE: + return "Invalid range"; + case TRIO_ECUSTOM: + return "Custom error"; + default: + return "Unknown"; + } +} diff --git a/bundle/libxml/trio.h b/bundle/libxml/trio.h new file mode 100644 index 0000000000..770c6a6f58 --- /dev/null +++ b/bundle/libxml/trio.h @@ -0,0 +1,216 @@ +/************************************************************************* + * + * $Id$ + * + * Copyright (C) 1998 Bjorn Reese and Daniel Stenberg. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + ************************************************************************* + * + * http://ctrio.sourceforge.net/ + * + ************************************************************************/ + +#ifndef TRIO_TRIO_H +#define TRIO_TRIO_H + +#include <stdio.h> +#include <stdlib.h> +#if defined(TRIO_COMPILER_ANCIENT) +# include <varargs.h> +#else +# include <stdarg.h> +#endif + +#if !defined(WITHOUT_TRIO) + +/* + * Use autoconf defines if present. Packages using trio must define + * HAVE_CONFIG_H as a compiler option themselves. + */ +#if defined(HAVE_CONFIG_H) +# include <config.h> +#endif + +#include "triodef.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Error codes. + * + * Remember to add a textual description to trio_strerror. + */ +enum { + TRIO_EOF = 1, + TRIO_EINVAL = 2, + TRIO_ETOOMANY = 3, + TRIO_EDBLREF = 4, + TRIO_EGAP = 5, + TRIO_ENOMEM = 6, + TRIO_ERANGE = 7, + TRIO_ERRNO = 8, + TRIO_ECUSTOM = 9 +}; + +/* Error macros */ +#define TRIO_ERROR_CODE(x) ((-(x)) & 0x00FF) +#define TRIO_ERROR_POSITION(x) ((-(x)) >> 8) +#define TRIO_ERROR_NAME(x) trio_strerror(x) + +typedef int (*trio_outstream_t) TRIO_PROTO((trio_pointer_t, int)); +typedef int (*trio_instream_t) TRIO_PROTO((trio_pointer_t)); + +TRIO_CONST char *trio_strerror TRIO_PROTO((int)); + +/************************************************************************* + * Print Functions + */ + +int trio_printf TRIO_PROTO((TRIO_CONST char *format, ...)); +int trio_vprintf TRIO_PROTO((TRIO_CONST char *format, va_list args)); +int trio_printfv TRIO_PROTO((TRIO_CONST char *format, void **args)); + +int trio_fprintf TRIO_PROTO((FILE *file, TRIO_CONST char *format, ...)); +int trio_vfprintf TRIO_PROTO((FILE *file, TRIO_CONST char *format, va_list args)); +int trio_fprintfv TRIO_PROTO((FILE *file, TRIO_CONST char *format, void **args)); + +int trio_dprintf TRIO_PROTO((int fd, TRIO_CONST char *format, ...)); +int trio_vdprintf TRIO_PROTO((int fd, TRIO_CONST char *format, va_list args)); +int trio_dprintfv TRIO_PROTO((int fd, TRIO_CONST char *format, void **args)); + +int trio_cprintf TRIO_PROTO((trio_outstream_t stream, trio_pointer_t closure, + TRIO_CONST char *format, ...)); +int trio_vcprintf TRIO_PROTO((trio_outstream_t stream, trio_pointer_t closure, + TRIO_CONST char *format, va_list args)); +int trio_cprintfv TRIO_PROTO((trio_outstream_t stream, trio_pointer_t closure, + TRIO_CONST char *format, void **args)); + +int trio_sprintf TRIO_PROTO((char *buffer, TRIO_CONST char *format, ...)); +int trio_vsprintf TRIO_PROTO((char *buffer, TRIO_CONST char *format, va_list args)); +int trio_sprintfv TRIO_PROTO((char *buffer, TRIO_CONST char *format, void **args)); + +int trio_snprintf TRIO_PROTO((char *buffer, size_t max, TRIO_CONST char *format, ...)); +int trio_vsnprintf TRIO_PROTO((char *buffer, size_t bufferSize, TRIO_CONST char *format, + va_list args)); +int trio_snprintfv TRIO_PROTO((char *buffer, size_t bufferSize, TRIO_CONST char *format, + void **args)); + +int trio_snprintfcat TRIO_PROTO((char *buffer, size_t max, TRIO_CONST char *format, ...)); +int trio_vsnprintfcat TRIO_PROTO((char *buffer, size_t bufferSize, TRIO_CONST char *format, + va_list args)); + +char *trio_aprintf TRIO_PROTO((TRIO_CONST char *format, ...)); +char *trio_vaprintf TRIO_PROTO((TRIO_CONST char *format, va_list args)); + +int trio_asprintf TRIO_PROTO((char **ret, TRIO_CONST char *format, ...)); +int trio_vasprintf TRIO_PROTO((char **ret, TRIO_CONST char *format, va_list args)); + +/************************************************************************* + * Scan Functions + */ +int trio_scanf TRIO_PROTO((TRIO_CONST char *format, ...)); +int trio_vscanf TRIO_PROTO((TRIO_CONST char *format, va_list args)); +int trio_scanfv TRIO_PROTO((TRIO_CONST char *format, void **args)); + +int trio_fscanf TRIO_PROTO((FILE *file, TRIO_CONST char *format, ...)); +int trio_vfscanf TRIO_PROTO((FILE *file, TRIO_CONST char *format, va_list args)); +int trio_fscanfv TRIO_PROTO((FILE *file, TRIO_CONST char *format, void **args)); + +int trio_dscanf TRIO_PROTO((int fd, TRIO_CONST char *format, ...)); +int trio_vdscanf TRIO_PROTO((int fd, TRIO_CONST char *format, va_list args)); +int trio_dscanfv TRIO_PROTO((int fd, TRIO_CONST char *format, void **args)); + +int trio_cscanf TRIO_PROTO((trio_instream_t stream, trio_pointer_t closure, + TRIO_CONST char *format, ...)); +int trio_vcscanf TRIO_PROTO((trio_instream_t stream, trio_pointer_t closure, + TRIO_CONST char *format, va_list args)); +int trio_cscanfv TRIO_PROTO((trio_instream_t stream, trio_pointer_t closure, + TRIO_CONST char *format, void **args)); + +int trio_sscanf TRIO_PROTO((TRIO_CONST char *buffer, TRIO_CONST char *format, ...)); +int trio_vsscanf TRIO_PROTO((TRIO_CONST char *buffer, TRIO_CONST char *format, va_list args)); +int trio_sscanfv TRIO_PROTO((TRIO_CONST char *buffer, TRIO_CONST char *format, void **args)); + +/************************************************************************* + * Locale Functions + */ +void trio_locale_set_decimal_point TRIO_PROTO((char *decimalPoint)); +void trio_locale_set_thousand_separator TRIO_PROTO((char *thousandSeparator)); +void trio_locale_set_grouping TRIO_PROTO((char *grouping)); + +/************************************************************************* + * Renaming + */ +#ifdef TRIO_REPLACE_STDIO +/* Replace the <stdio.h> functions */ +#ifndef HAVE_PRINTF +# define printf trio_printf +#endif +#ifndef HAVE_VPRINTF +# define vprintf trio_vprintf +#endif +#ifndef HAVE_FPRINTF +# define fprintf trio_fprintf +#endif +#ifndef HAVE_VFPRINTF +# define vfprintf trio_vfprintf +#endif +#ifndef HAVE_SPRINTF +# define sprintf trio_sprintf +#endif +#ifndef HAVE_VSPRINTF +# define vsprintf trio_vsprintf +#endif +#ifndef HAVE_SNPRINTF +# define snprintf trio_snprintf +#endif +#ifndef HAVE_VSNPRINTF +# define vsnprintf trio_vsnprintf +#endif +#ifndef HAVE_SCANF +# define scanf trio_scanf +#endif +#ifndef HAVE_VSCANF +# define vscanf trio_vscanf +#endif +#ifndef HAVE_FSCANF +# define fscanf trio_fscanf +#endif +#ifndef HAVE_VFSCANF +# define vfscanf trio_vfscanf +#endif +#ifndef HAVE_SSCANF +# define sscanf trio_sscanf +#endif +#ifndef HAVE_VSSCANF +# define vsscanf trio_vsscanf +#endif +/* These aren't stdio functions, but we make them look similar */ +#define dprintf trio_dprintf +#define vdprintf trio_vdprintf +#define aprintf trio_aprintf +#define vaprintf trio_vaprintf +#define asprintf trio_asprintf +#define vasprintf trio_vasprintf +#define dscanf trio_dscanf +#define vdscanf trio_vdscanf +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* WITHOUT_TRIO */ + +#endif /* TRIO_TRIO_H */ diff --git a/bundle/libxml/triodef.h b/bundle/libxml/triodef.h new file mode 100644 index 0000000000..73d64311a7 --- /dev/null +++ b/bundle/libxml/triodef.h @@ -0,0 +1,182 @@ +/************************************************************************* + * + * $Id$ + * + * Copyright (C) 2001 Bjorn Reese <breese@users.sourceforge.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + ************************************************************************/ + +#ifndef TRIO_TRIODEF_H +#define TRIO_TRIODEF_H + +/************************************************************************* + * Platform and compiler support detection + */ +#if defined(__GNUC__) +# define TRIO_COMPILER_GCC +#elif defined(__SUNPRO_C) +# define TRIO_COMPILER_SUNPRO +#elif defined(__SUNPRO_CC) +# define TRIO_COMPILER_SUNPRO +# define __SUNPRO_C __SUNPRO_CC +#elif defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) +# define TRIO_COMPILER_XLC +#elif defined(_AIX) && !defined(__GNUC__) +# define TRIO_COMPILER_XLC /* Workaround for old xlc */ +#elif defined(__DECC) || defined(__DECCXX) +# define TRIO_COMPILER_DECC +#elif defined(__osf__) && defined(__LANGUAGE_C__) +# define TRIO_COMPILER_DECC /* Workaround for old DEC C compilers */ +#elif defined(_MSC_VER) +# define TRIO_COMPILER_MSVC +#elif defined(__BORLANDC__) +# define TRIO_COMPILER_BCB +#endif + +#if defined(unix) || defined(__unix) || defined(__unix__) +# define TRIO_PLATFORM_UNIX +#elif defined(TRIO_COMPILER_XLC) || defined(_AIX) +# define TRIO_PLATFORM_UNIX +#elif ( defined(TRIO_COMPILER_DECC) && !defined(__VMS) ) || defined(__osf__) +# define TRIO_PLATFORM_UNIX +#elif defined(__NetBSD__) +# define TRIO_PLATFORM_UNIX +#elif defined(__QNX__) +# define TRIO_PLATFORM_UNIX +# define TRIO_PLATFORM_QNX +#elif defined(__CYGWIN__) +# define TRIO_PLATFORM_UNIX +#elif defined(AMIGA) && defined(TRIO_COMPILER_GCC) +# define TRIO_PLATFORM_UNIX +#elif defined(TRIO_COMPILER_MSVC) || defined(WIN32) || defined(_WIN32) +# define TRIO_PLATFORM_WIN32 +#elif defined(VMS) || defined(__VMS) +# define TRIO_PLATFORM_VMS +#elif defined(mpeix) || defined(__mpexl) +# define TRIO_PLATFORM_MPEIX +#endif + +#if defined(__STDC__) || defined(TRIO_COMPILER_MSVC) +# define TRIO_COMPILER_SUPPORTS_C89 +# if defined(__STDC_VERSION__) +# define TRIO_COMPILER_SUPPORTS_C90 +# if (__STDC_VERSION__ >= 199409L) +# define TRIO_COMPILER_SUPPORTS_C94 +# endif +# if (__STDC_VERSION__ >= 199901L) +# define TRIO_COMPILER_SUPPORTS_C99 +# endif +# elif defined(TRIO_COMPILER_SUNPRO) +# if (__SUNPRO_C >= 0x420) +# define TRIO_COMPILER_SUPPORTS_C94 +# endif +# endif +#endif + +#if defined(TRIO_PLATFORM_VMS) + /* The compiler does support C99 but the library still does not have things + * the standard requires (like nan() and strtof()) as of __CRTL_VER 70300022. + */ +# undef TRIO_COMPILER_SUPPORTS_C99 + + /* Computations done with constants at compile time can trigger these + * even when compiling with IEEE enabled. + */ +# pragma message disable (UNDERFLOW,FLOATOVERFL) +#endif /* TRIO_PLATFORM_VMS */ + +#if defined(_XOPEN_SOURCE) +# if defined(_XOPEN_SOURCE_EXTENDED) +# define TRIO_COMPILER_SUPPORTS_UNIX95 +# endif +# if (_XOPEN_VERSION >= 500) +# define TRIO_COMPILER_SUPPORTS_UNIX98 +# endif +# if (_XOPEN_VERSION >= 600) +# define TRIO_COMPILER_SUPPORTS_UNIX01 +# endif +#endif + +/************************************************************************* + * Generic defines + */ + +#if !defined(TRIO_PUBLIC) +# define TRIO_PUBLIC +#endif +#if !defined(TRIO_PRIVATE) +# define TRIO_PRIVATE static +#endif + +#if !(defined(TRIO_COMPILER_SUPPORTS_C89) || defined(__cplusplus)) +# define TRIO_COMPILER_ANCIENT +#endif + +#if defined(TRIO_COMPILER_ANCIENT) +# define TRIO_CONST +# define TRIO_VOLATILE +# define TRIO_SIGNED +typedef double trio_long_double_t; +typedef char * trio_pointer_t; +# define TRIO_SUFFIX_LONG(x) x +# define TRIO_PROTO(x) () +# define TRIO_NOARGS +# define TRIO_ARGS1(list,a1) list a1; +# define TRIO_ARGS2(list,a1,a2) list a1; a2; +# define TRIO_ARGS3(list,a1,a2,a3) list a1; a2; a3; +# define TRIO_ARGS4(list,a1,a2,a3,a4) list a1; a2; a3; a4; +# define TRIO_ARGS5(list,a1,a2,a3,a4,a5) list a1; a2; a3; a4; a5; +# define TRIO_ARGS6(list,a1,a2,a3,a4,a5,a6) list a1; a2; a3; a4; a5; a6; +# define TRIO_VARGS2(list,a1,a2) list a1; a2 +# define TRIO_VARGS3(list,a1,a2,a3) list a1; a2; a3 +# define TRIO_VARGS4(list,a1,a2,a3,a4) list a1; a2; a3; a4 +# define TRIO_VARGS5(list,a1,a2,a3,a4,a5) list a1; a2; a3; a4; a5 +# define TRIO_VA_DECL va_dcl +# define TRIO_VA_START(x,y) va_start((x)) +# define TRIO_VA_END(x) va_end(x) +#else /* ANSI C */ +# define TRIO_CONST const +# define TRIO_VOLATILE volatile +# define TRIO_SIGNED signed +typedef long double trio_long_double_t; +typedef void * trio_pointer_t; +# define TRIO_SUFFIX_LONG(x) x ## L +# define TRIO_PROTO(x) x +# define TRIO_NOARGS void +# define TRIO_ARGS1(list,a1) (a1) +# define TRIO_ARGS2(list,a1,a2) (a1,a2) +# define TRIO_ARGS3(list,a1,a2,a3) (a1,a2,a3) +# define TRIO_ARGS4(list,a1,a2,a3,a4) (a1,a2,a3,a4) +# define TRIO_ARGS5(list,a1,a2,a3,a4,a5) (a1,a2,a3,a4,a5) +# define TRIO_ARGS6(list,a1,a2,a3,a4,a5,a6) (a1,a2,a3,a4,a5,a6) +# define TRIO_VARGS2 TRIO_ARGS2 +# define TRIO_VARGS3 TRIO_ARGS3 +# define TRIO_VARGS4 TRIO_ARGS4 +# define TRIO_VARGS5 TRIO_ARGS5 +# define TRIO_VA_DECL ... +# define TRIO_VA_START(x,y) va_start((x),(y)) +# define TRIO_VA_END(x) va_end(x) +#endif + +#if defined(TRIO_COMPILER_SUPPORTS_C99) || defined(__cplusplus) +# define TRIO_INLINE inline +#elif defined(TRIO_COMPILER_GCC) +# define TRIO_INLINE __inline__ +#elif defined(TRIO_COMPILER_MSVC) +# define TRIO_INLINE _inline +#elif defined(TRIO_COMPILER_BCB) +# define TRIO_INLINE __inline +#else +# define TRIO_INLINE +#endif + +#endif /* TRIO_TRIODEF_H */ diff --git a/bundle/libxml/trionan.c b/bundle/libxml/trionan.c new file mode 100644 index 0000000000..3a65986b85 --- /dev/null +++ b/bundle/libxml/trionan.c @@ -0,0 +1,913 @@ +/************************************************************************* + * + * $Id$ + * + * Copyright (C) 2001 Bjorn Reese <breese@users.sourceforge.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + ************************************************************************ + * + * Functions to handle special quantities in floating-point numbers + * (that is, NaNs and infinity). They provide the capability to detect + * and fabricate special quantities. + * + * Although written to be as portable as possible, it can never be + * guaranteed to work on all platforms, as not all hardware supports + * special quantities. + * + * The approach used here (approximately) is to: + * + * 1. Use C99 functionality when available. + * 2. Use IEEE 754 bit-patterns if possible. + * 3. Use platform-specific techniques. + * + ************************************************************************/ + +/* + * TODO: + * o Put all the magic into trio_fpclassify_and_signbit(), and use this from + * trio_isnan() etc. + */ + +/************************************************************************* + * Include files + */ +#include "triodef.h" +#include "trionan.h" + +#include <math.h> +#include <string.h> +#include <limits.h> +#include <float.h> +#if defined(TRIO_PLATFORM_UNIX) +# include <signal.h> +#endif +#if defined(TRIO_COMPILER_DECC) +# include <fp_class.h> +#endif +#include <assert.h> + +#if defined(TRIO_DOCUMENTATION) +# include "doc/doc_nan.h" +#endif +/** @addtogroup SpecialQuantities + @{ +*/ + +/************************************************************************* + * Definitions + */ + +#define TRIO_TRUE (1 == 1) +#define TRIO_FALSE (0 == 1) + +/* We must enable IEEE floating-point on Alpha */ +#if defined(__alpha) && !defined(_IEEE_FP) +# if defined(TRIO_COMPILER_DECC) +# if defined(TRIO_PLATFORM_VMS) +# error "Must be compiled with option /IEEE_MODE=UNDERFLOW_TO_ZERO/FLOAT=IEEE" +# else +# if !defined(_CFE) +# error "Must be compiled with option -ieee" +# endif +# endif +# elif defined(TRIO_COMPILER_GCC) && (defined(__osf__) || defined(__linux__)) +# error "Must be compiled with option -mieee" +# endif +#endif /* __alpha && ! _IEEE_FP */ + +/* + * In ANSI/IEEE 754-1985 64-bits double format numbers have the + * following properties (amoungst others) + * + * o FLT_RADIX == 2: binary encoding + * o DBL_MAX_EXP == 1024: 11 bits exponent, where one bit is used + * to indicate special numbers (e.g. NaN and Infinity), so the + * maximum exponent is 10 bits wide (2^10 == 1024). + * o DBL_MANT_DIG == 53: The mantissa is 52 bits wide, but because + * numbers are normalized the initial binary 1 is represented + * implicitly (the so-called "hidden bit"), which leaves us with + * the ability to represent 53 bits wide mantissa. + */ +#if (FLT_RADIX == 2) && (DBL_MAX_EXP == 1024) && (DBL_MANT_DIG == 53) +# define USE_IEEE_754 +#endif + + +/************************************************************************* + * Constants + */ + +static TRIO_CONST char rcsid[] = "@(#)$Id$"; + +#if defined(USE_IEEE_754) + +/* + * Endian-agnostic indexing macro. + * + * The value of internalEndianMagic, when converted into a 64-bit + * integer, becomes 0x0706050403020100 (we could have used a 64-bit + * integer value instead of a double, but not all platforms supports + * that type). The value is automatically encoded with the correct + * endianess by the compiler, which means that we can support any + * kind of endianess. The individual bytes are then used as an index + * for the IEEE 754 bit-patterns and masks. + */ +#define TRIO_DOUBLE_INDEX(x) (((unsigned char *)&internalEndianMagic)[7-(x)]) + +static TRIO_CONST double internalEndianMagic = 7.949928895127363e-275; + +/* Mask for the exponent */ +static TRIO_CONST unsigned char ieee_754_exponent_mask[] = { + 0x7F, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* Mask for the mantissa */ +static TRIO_CONST unsigned char ieee_754_mantissa_mask[] = { + 0x00, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +/* Mask for the sign bit */ +static TRIO_CONST unsigned char ieee_754_sign_mask[] = { + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* Bit-pattern for negative zero */ +static TRIO_CONST unsigned char ieee_754_negzero_array[] = { + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* Bit-pattern for infinity */ +static TRIO_CONST unsigned char ieee_754_infinity_array[] = { + 0x7F, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* Bit-pattern for quiet NaN */ +static TRIO_CONST unsigned char ieee_754_qnan_array[] = { + 0x7F, 0xF8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + + +/************************************************************************* + * Functions + */ + +/* + * trio_make_double + */ +TRIO_PRIVATE double +trio_make_double +TRIO_ARGS1((values), + TRIO_CONST unsigned char *values) +{ + TRIO_VOLATILE double result; + int i; + + for (i = 0; i < (int)sizeof(double); i++) { + ((TRIO_VOLATILE unsigned char *)&result)[TRIO_DOUBLE_INDEX(i)] = values[i]; + } + return result; +} + +/* + * trio_is_special_quantity + */ +TRIO_PRIVATE int +trio_is_special_quantity +TRIO_ARGS2((number, has_mantissa), + double number, + int *has_mantissa) +{ + unsigned int i; + unsigned char current; + int is_special_quantity = TRIO_TRUE; + + *has_mantissa = 0; + + for (i = 0; i < (unsigned int)sizeof(double); i++) { + current = ((unsigned char *)&number)[TRIO_DOUBLE_INDEX(i)]; + is_special_quantity + &= ((current & ieee_754_exponent_mask[i]) == ieee_754_exponent_mask[i]); + *has_mantissa |= (current & ieee_754_mantissa_mask[i]); + } + return is_special_quantity; +} + +/* + * trio_is_negative + */ +TRIO_PRIVATE int +trio_is_negative +TRIO_ARGS1((number), + double number) +{ + unsigned int i; + int is_negative = TRIO_FALSE; + + for (i = 0; i < (unsigned int)sizeof(double); i++) { + is_negative |= (((unsigned char *)&number)[TRIO_DOUBLE_INDEX(i)] + & ieee_754_sign_mask[i]); + } + return is_negative; +} + +#endif /* USE_IEEE_754 */ + + +/** + Generate negative zero. + + @return Floating-point representation of negative zero. +*/ +TRIO_PUBLIC double +trio_nzero(TRIO_NOARGS) +{ +#if defined(USE_IEEE_754) + return trio_make_double(ieee_754_negzero_array); +#else + TRIO_VOLATILE double zero = 0.0; + + return -zero; +#endif +} + +/** + Generate positive infinity. + + @return Floating-point representation of positive infinity. +*/ +TRIO_PUBLIC double +trio_pinf(TRIO_NOARGS) +{ + /* Cache the result */ + static double result = 0.0; + + if (result == 0.0) { + +#if defined(INFINITY) && defined(__STDC_IEC_559__) + result = (double)INFINITY; + +#elif defined(USE_IEEE_754) + result = trio_make_double(ieee_754_infinity_array); + +#else + /* + * If HUGE_VAL is different from DBL_MAX, then HUGE_VAL is used + * as infinity. Otherwise we have to resort to an overflow + * operation to generate infinity. + */ +# if defined(TRIO_PLATFORM_UNIX) + void (*signal_handler)(int) = signal(SIGFPE, SIG_IGN); +# endif + + result = HUGE_VAL; + if (HUGE_VAL == DBL_MAX) { + /* Force overflow */ + result += HUGE_VAL; + } + +# if defined(TRIO_PLATFORM_UNIX) + signal(SIGFPE, signal_handler); +# endif + +#endif + } + return result; +} + +/** + Generate negative infinity. + + @return Floating-point value of negative infinity. +*/ +TRIO_PUBLIC double +trio_ninf(TRIO_NOARGS) +{ + static double result = 0.0; + + if (result == 0.0) { + /* + * Negative infinity is calculated by negating positive infinity, + * which can be done because it is legal to do calculations on + * infinity (for example, 1 / infinity == 0). + */ + result = -trio_pinf(); + } + return result; +} + +/** + Generate NaN. + + @return Floating-point representation of NaN. +*/ +TRIO_PUBLIC double +trio_nan(TRIO_NOARGS) +{ + /* Cache the result */ + static double result = 0.0; + + if (result == 0.0) { + +#if defined(TRIO_COMPILER_SUPPORTS_C99) + result = nan(""); + +#elif defined(NAN) && defined(__STDC_IEC_559__) + result = (double)NAN; + +#elif defined(USE_IEEE_754) + result = trio_make_double(ieee_754_qnan_array); + +#else + /* + * There are several ways to generate NaN. The one used here is + * to divide infinity by infinity. I would have preferred to add + * negative infinity to positive infinity, but that yields wrong + * result (infinity) on FreeBSD. + * + * This may fail if the hardware does not support NaN, or if + * the Invalid Operation floating-point exception is unmasked. + */ +# if defined(TRIO_PLATFORM_UNIX) + void (*signal_handler)(int) = signal(SIGFPE, SIG_IGN); +# endif + + result = trio_pinf() / trio_pinf(); + +# if defined(TRIO_PLATFORM_UNIX) + signal(SIGFPE, signal_handler); +# endif + +#endif + } + return result; +} + +/** + Check for NaN. + + @param number An arbitrary floating-point number. + @return Boolean value indicating whether or not the number is a NaN. +*/ +TRIO_PUBLIC int +trio_isnan +TRIO_ARGS1((number), + double number) +{ +#if (defined(TRIO_COMPILER_SUPPORTS_C99) && defined(isnan)) \ + || defined(TRIO_COMPILER_SUPPORTS_UNIX95) + /* + * C99 defines isnan() as a macro. UNIX95 defines isnan() as a + * function. This function was already present in XPG4, but this + * is a bit tricky to detect with compiler defines, so we choose + * the conservative approach and only use it for UNIX95. + */ + return isnan(number); + +#elif defined(TRIO_COMPILER_MSVC) + /* + * MSVC has an _isnan() function + */ + return _isnan(number); + +#elif defined(USE_IEEE_754) + /* + * Examine IEEE 754 bit-pattern. A NaN must have a special exponent + * pattern, and a non-empty mantissa. + */ + int has_mantissa; + int is_special_quantity; + + is_special_quantity = trio_is_special_quantity(number, &has_mantissa); + + return (is_special_quantity && has_mantissa); + +#else + /* + * Fallback solution + */ + int status; + double integral, fraction; + +# if defined(TRIO_PLATFORM_UNIX) + void (*signal_handler)(int) = signal(SIGFPE, SIG_IGN); +# endif + + status = (/* + * NaN is the only number which does not compare to itself + */ + ((TRIO_VOLATILE double)number != (TRIO_VOLATILE double)number) || + /* + * Fallback solution if NaN compares to NaN + */ + ((number != 0.0) && + (fraction = modf(number, &integral), + integral == fraction))); + +# if defined(TRIO_PLATFORM_UNIX) + signal(SIGFPE, signal_handler); +# endif + + return status; + +#endif +} + +/** + Check for infinity. + + @param number An arbitrary floating-point number. + @return 1 if positive infinity, -1 if negative infinity, 0 otherwise. +*/ +TRIO_PUBLIC int +trio_isinf +TRIO_ARGS1((number), + double number) +{ +#if defined(TRIO_COMPILER_DECC) + /* + * DECC has an isinf() macro, but it works differently than that + * of C99, so we use the fp_class() function instead. + */ + return ((fp_class(number) == FP_POS_INF) + ? 1 + : ((fp_class(number) == FP_NEG_INF) ? -1 : 0)); + +#elif defined(isinf) + /* + * C99 defines isinf() as a macro. + */ + return isinf(number) + ? ((number > 0.0) ? 1 : -1) + : 0; + +#elif defined(TRIO_COMPILER_MSVC) + /* + * MSVC has an _fpclass() function that can be used to detect infinity. + */ + return ((_fpclass(number) == _FPCLASS_PINF) + ? 1 + : ((_fpclass(number) == _FPCLASS_NINF) ? -1 : 0)); + +#elif defined(USE_IEEE_754) + /* + * Examine IEEE 754 bit-pattern. Infinity must have a special exponent + * pattern, and an empty mantissa. + */ + int has_mantissa; + int is_special_quantity; + + is_special_quantity = trio_is_special_quantity(number, &has_mantissa); + + return (is_special_quantity && !has_mantissa) + ? ((number < 0.0) ? -1 : 1) + : 0; + +#else + /* + * Fallback solution. + */ + int status; + +# if defined(TRIO_PLATFORM_UNIX) + void (*signal_handler)(int) = signal(SIGFPE, SIG_IGN); +# endif + + double infinity = trio_pinf(); + + status = ((number == infinity) + ? 1 + : ((number == -infinity) ? -1 : 0)); + +# if defined(TRIO_PLATFORM_UNIX) + signal(SIGFPE, signal_handler); +# endif + + return status; + +#endif +} + + +/** + Check for finity. + + @param number An arbitrary floating-point number. + @return Boolean value indicating whether or not the number is a finite. +*/ +TRIO_PUBLIC int +trio_isfinite +TRIO_ARGS1((number), + double number) +{ +#if defined(TRIO_COMPILER_SUPPORTS_C99) && defined(isfinite) + /* + * C99 defines isfinite() as a macro. + */ + return isfinite(number); + +#elif defined(TRIO_COMPILER_MSVC) + /* + * MSVC uses _finite(). + */ + return _finite(number); + +#elif defined(USE_IEEE_754) + /* + * Examine IEEE 754 bit-pattern. For finity we do not care about the + * mantissa. + */ + int dummy; + + return (! trio_is_special_quantity(number, &dummy)); + +#else + /* + * Fallback solution. + */ + return ((trio_isinf(number) == 0) && (trio_isnan(number) == 0)); + +#endif +} + +/* + * The sign of NaN is always false + */ +TRIO_PUBLIC int +trio_fpclassify_and_signbit +TRIO_ARGS2((number, is_negative), + double number, + int *is_negative) +{ +#if defined(fpclassify) && defined(signbit) + /* + * C99 defines fpclassify() and signbit() as a macros + */ + *is_negative = signbit(number); + switch (fpclassify(number)) { + case FP_NAN: + return TRIO_FP_NAN; + case FP_INFINITE: + return TRIO_FP_INFINITE; + case FP_SUBNORMAL: + return TRIO_FP_SUBNORMAL; + case FP_ZERO: + return TRIO_FP_ZERO; + default: + return TRIO_FP_NORMAL; + } + +#elif defined(TRIO_COMPILER_DECC) + /* + * DECC has an fp_class() function. + */ + switch (fp_class(number)) { + case FP_QNAN: + case FP_SNAN: + *is_negative = TRIO_FALSE; /* NaN has no sign */ + return TRIO_FP_NAN; + case FP_POS_INF: + *is_negative = TRIO_FALSE; + return TRIO_FP_INFINITE; + case FP_NEG_INF: + *is_negative = TRIO_TRUE; + return TRIO_FP_INFINITE; + case FP_POS_DENORM: + *is_negative = TRIO_FALSE; + return TRIO_FP_SUBNORMAL; + case FP_NEG_DENORM: + *is_negative = TRIO_TRUE; + return TRIO_FP_SUBNORMAL; + case FP_POS_ZERO: + *is_negative = TRIO_FALSE; + return TRIO_FP_ZERO; + case FP_NEG_ZERO: + *is_negative = TRIO_TRUE; + return TRIO_FP_ZERO; + case FP_POS_NORM: + *is_negative = TRIO_FALSE; + return TRIO_FP_NORMAL; + case FP_NEG_NORM: + *is_negative = TRIO_TRUE; + return TRIO_FP_NORMAL; + default: + /* Just in case... */ + *is_negative = (number < 0.0); + return TRIO_FP_NORMAL; + } + +#elif defined(TRIO_COMPILER_MSVC) + /* + * MSVC has an _fpclass() function. + */ + switch (_fpclass(number)) { + case _FPCLASS_QNAN: + case _FPCLASS_SNAN: + *is_negative = TRIO_FALSE; + return TRIO_FP_NAN; + case _FPCLASS_PINF: + *is_negative = TRIO_FALSE; + return TRIO_FP_INFINITE; + case _FPCLASS_NINF: + *is_negative = TRIO_TRUE; + return TRIO_FP_INFINITE; + case _FPCLASS_PD: + *is_negative = TRIO_FALSE; + return TRIO_FP_SUBNORMAL; + case _FPCLASS_ND: + *is_negative = TRIO_TRUE; + return TRIO_FP_SUBNORMAL; + case _FPCLASS_PZ: + *is_negative = TRIO_FALSE; + return TRIO_FP_ZERO; + case _FPCLASS_NZ: + *is_negative = TRIO_TRUE; + return TRIO_FP_ZERO; + case _FPCLASS_PN: + *is_negative = TRIO_FALSE; + return TRIO_FP_NORMAL; + case _FPCLASS_NN: + *is_negative = TRIO_TRUE; + return TRIO_FP_NORMAL; + default: + /* Just in case... */ + *is_negative = (number < 0.0); + return TRIO_FP_NORMAL; + } + +#elif defined(FP_PLUS_NORM) || defined(__hpux) + + /* + * HP-UX 9.x and 10.x have an fpclassify() function, that is different + * from the C99 fpclassify() macro supported on HP-UX 11.x. + */ + switch (fpclassify(number)) { + case FP_QNAN: + case FP_SNAN: + *is_negative = TRIO_FALSE; + return TRIO_FP_NAN; + case FP_PLUS_INF: + *is_negative = TRIO_FALSE; + return TRIO_FP_INFINITE; + case FP_MINUS_INF: + *is_negative = TRIO_TRUE; + return TRIO_FP_INFINITE; + case FP_PLUS_DENORM: + *is_negative = TRIO_FALSE; + return TRIO_FP_SUBNORMAL; + case FP_MINUS_DENORM: + *is_negative = TRIO_TRUE; + return TRIO_FP_SUBNORMAL; + case FP_PLUS_ZERO: + *is_negative = TRIO_FALSE; + return TRIO_FP_ZERO; + case FP_MINUS_ZERO: + *is_negative = TRIO_TRUE; + return TRIO_FP_ZERO; + case FP_PLUS_NORM: + *is_negative = TRIO_FALSE; + return TRIO_FP_NORMAL; + case FP_MINUS_NORM: + *is_negative = TRIO_TRUE; + return TRIO_FP_NORMAL; + default: + assert(0); + } + +#else + /* + * Fallback solution. + */ + int rc; + + if (number == 0.0) { + /* + * In IEEE 754 the sign of zero is ignored in comparisons, so we + * have to handle this as a special case by examining the sign bit + * directly. + */ +#if defined(USE_IEEE_754) + *is_negative = trio_is_negative(number); +#else + *is_negative = TRIO_FALSE; /* FIXME */ +#endif + return TRIO_FP_ZERO; + } + if (trio_isnan(number)) { + *is_negative = TRIO_FALSE; + return TRIO_FP_NAN; + } + if ((rc = trio_isinf(number))) { + *is_negative = (rc == -1); + return TRIO_FP_INFINITE; + } + if ((number > 0.0) && (number < DBL_MIN)) { + *is_negative = TRIO_FALSE; + return TRIO_FP_SUBNORMAL; + } + if ((number < 0.0) && (number > -DBL_MIN)) { + *is_negative = TRIO_TRUE; + return TRIO_FP_SUBNORMAL; + } + *is_negative = (number < 0.0); + return TRIO_FP_NORMAL; + +#endif +} + +/** + Examine the sign of a number. + + @param number An arbitrary floating-point number. + @return Boolean value indicating whether or not the number has the + sign bit set (i.e. is negative). +*/ +TRIO_PUBLIC int +trio_signbit +TRIO_ARGS1((number), + double number) +{ + int is_negative; + + (void)trio_fpclassify_and_signbit(number, &is_negative); + return is_negative; +} + +/** + Examine the class of a number. + + @param number An arbitrary floating-point number. + @return Enumerable value indicating the class of @p number +*/ +TRIO_PUBLIC int +trio_fpclassify +TRIO_ARGS1((number), + double number) +{ + int dummy; + + return trio_fpclassify_and_signbit(number, &dummy); +} + + +/** @} SpecialQuantities */ + +/************************************************************************* + * For test purposes. + * + * Add the following compiler option to include this test code. + * + * Unix : -DSTANDALONE + * VMS : /DEFINE=(STANDALONE) + */ +#if defined(STANDALONE) +# include <stdio.h> + +static TRIO_CONST char * +getClassification +TRIO_ARGS1((type), + int type) +{ + switch (type) { + case TRIO_FP_INFINITE: + return "FP_INFINITE"; + case TRIO_FP_NAN: + return "FP_NAN"; + case TRIO_FP_NORMAL: + return "FP_NORMAL"; + case TRIO_FP_SUBNORMAL: + return "FP_SUBNORMAL"; + case TRIO_FP_ZERO: + return "FP_ZERO"; + default: + return "FP_UNKNOWN"; + } +} + +static void +print_class +TRIO_ARGS2((prefix, number), + TRIO_CONST char *prefix, + double number) +{ + printf("%-6s: %s %-15s %g\n", + prefix, + trio_signbit(number) ? "-" : "+", + getClassification(trio_fpclassify(number)), + number); +} + +int main(TRIO_NOARGS) +{ + double my_nan; + double my_pinf; + double my_ninf; +# if defined(TRIO_PLATFORM_UNIX) + void (*signal_handler) TRIO_PROTO((int)); +# endif + + my_nan = trio_nan(); + my_pinf = trio_pinf(); + my_ninf = trio_ninf(); + + print_class("Nan", my_nan); + print_class("PInf", my_pinf); + print_class("NInf", my_ninf); + print_class("PZero", 0.0); + print_class("NZero", -0.0); + print_class("PNorm", 1.0); + print_class("NNorm", -1.0); + print_class("PSub", 1.01e-307 - 1.00e-307); + print_class("NSub", 1.00e-307 - 1.01e-307); + + printf("NaN : %4g 0x%02x%02x%02x%02x%02x%02x%02x%02x (%2d, %2d)\n", + my_nan, + ((unsigned char *)&my_nan)[0], + ((unsigned char *)&my_nan)[1], + ((unsigned char *)&my_nan)[2], + ((unsigned char *)&my_nan)[3], + ((unsigned char *)&my_nan)[4], + ((unsigned char *)&my_nan)[5], + ((unsigned char *)&my_nan)[6], + ((unsigned char *)&my_nan)[7], + trio_isnan(my_nan), trio_isinf(my_nan)); + printf("PInf: %4g 0x%02x%02x%02x%02x%02x%02x%02x%02x (%2d, %2d)\n", + my_pinf, + ((unsigned char *)&my_pinf)[0], + ((unsigned char *)&my_pinf)[1], + ((unsigned char *)&my_pinf)[2], + ((unsigned char *)&my_pinf)[3], + ((unsigned char *)&my_pinf)[4], + ((unsigned char *)&my_pinf)[5], + ((unsigned char *)&my_pinf)[6], + ((unsigned char *)&my_pinf)[7], + trio_isnan(my_pinf), trio_isinf(my_pinf)); + printf("NInf: %4g 0x%02x%02x%02x%02x%02x%02x%02x%02x (%2d, %2d)\n", + my_ninf, + ((unsigned char *)&my_ninf)[0], + ((unsigned char *)&my_ninf)[1], + ((unsigned char *)&my_ninf)[2], + ((unsigned char *)&my_ninf)[3], + ((unsigned char *)&my_ninf)[4], + ((unsigned char *)&my_ninf)[5], + ((unsigned char *)&my_ninf)[6], + ((unsigned char *)&my_ninf)[7], + trio_isnan(my_ninf), trio_isinf(my_ninf)); + +# if defined(TRIO_PLATFORM_UNIX) + signal_handler = signal(SIGFPE, SIG_IGN); +# endif + + my_pinf = DBL_MAX + DBL_MAX; + my_ninf = -my_pinf; + my_nan = my_pinf / my_pinf; + +# if defined(TRIO_PLATFORM_UNIX) + signal(SIGFPE, signal_handler); +# endif + + printf("NaN : %4g 0x%02x%02x%02x%02x%02x%02x%02x%02x (%2d, %2d)\n", + my_nan, + ((unsigned char *)&my_nan)[0], + ((unsigned char *)&my_nan)[1], + ((unsigned char *)&my_nan)[2], + ((unsigned char *)&my_nan)[3], + ((unsigned char *)&my_nan)[4], + ((unsigned char *)&my_nan)[5], + ((unsigned char *)&my_nan)[6], + ((unsigned char *)&my_nan)[7], + trio_isnan(my_nan), trio_isinf(my_nan)); + printf("PInf: %4g 0x%02x%02x%02x%02x%02x%02x%02x%02x (%2d, %2d)\n", + my_pinf, + ((unsigned char *)&my_pinf)[0], + ((unsigned char *)&my_pinf)[1], + ((unsigned char *)&my_pinf)[2], + ((unsigned char *)&my_pinf)[3], + ((unsigned char *)&my_pinf)[4], + ((unsigned char *)&my_pinf)[5], + ((unsigned char *)&my_pinf)[6], + ((unsigned char *)&my_pinf)[7], + trio_isnan(my_pinf), trio_isinf(my_pinf)); + printf("NInf: %4g 0x%02x%02x%02x%02x%02x%02x%02x%02x (%2d, %2d)\n", + my_ninf, + ((unsigned char *)&my_ninf)[0], + ((unsigned char *)&my_ninf)[1], + ((unsigned char *)&my_ninf)[2], + ((unsigned char *)&my_ninf)[3], + ((unsigned char *)&my_ninf)[4], + ((unsigned char *)&my_ninf)[5], + ((unsigned char *)&my_ninf)[6], + ((unsigned char *)&my_ninf)[7], + trio_isnan(my_ninf), trio_isinf(my_ninf)); + + return 0; +} +#endif diff --git a/bundle/libxml/trionan.h b/bundle/libxml/trionan.h new file mode 100644 index 0000000000..bed0c120f9 --- /dev/null +++ b/bundle/libxml/trionan.h @@ -0,0 +1,81 @@ +/************************************************************************* + * + * $Id$ + * + * Copyright (C) 2001 Bjorn Reese <breese@users.sourceforge.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + ************************************************************************/ + +#ifndef TRIO_NAN_H +#define TRIO_NAN_H + +#include "triodef.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + TRIO_FP_INFINITE, + TRIO_FP_NAN, + TRIO_FP_NORMAL, + TRIO_FP_SUBNORMAL, + TRIO_FP_ZERO +}; + +/* + * Return NaN (Not-a-Number). + */ +TRIO_PUBLIC double trio_nan TRIO_PROTO((void)); + +/* + * Return positive infinity. + */ +TRIO_PUBLIC double trio_pinf TRIO_PROTO((void)); + +/* + * Return negative infinity. + */ +TRIO_PUBLIC double trio_ninf TRIO_PROTO((void)); + +/* + * Return negative zero. + */ +TRIO_PUBLIC double trio_nzero TRIO_PROTO((TRIO_NOARGS)); + +/* + * If number is a NaN return non-zero, otherwise return zero. + */ +TRIO_PUBLIC int trio_isnan TRIO_PROTO((double number)); + +/* + * If number is positive infinity return 1, if number is negative + * infinity return -1, otherwise return 0. + */ +TRIO_PUBLIC int trio_isinf TRIO_PROTO((double number)); + +/* + * If number is finite return non-zero, otherwise return zero. + */ +TRIO_PUBLIC int trio_isfinite TRIO_PROTO((double number)); + +TRIO_PUBLIC int trio_signbit TRIO_PROTO((double number)); + +TRIO_PUBLIC int trio_fpclassify TRIO_PROTO((double number)); + +TRIO_PUBLIC int trio_fpclassify_and_signbit TRIO_PROTO((double number, int *is_negative)); + +#ifdef __cplusplus +} +#endif + +#endif /* TRIO_NAN_H */ diff --git a/bundle/libxml/triop.h b/bundle/libxml/triop.h new file mode 100644 index 0000000000..8462c56f87 --- /dev/null +++ b/bundle/libxml/triop.h @@ -0,0 +1,150 @@ +/************************************************************************* + * + * $Id$ + * + * Copyright (C) 2000 Bjorn Reese and Daniel Stenberg. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + ************************************************************************ + * + * Private functions, types, etc. used for callback functions. + * + * The ref pointer is an opaque type and should remain as such. + * Private data must only be accessible through the getter and + * setter functions. + * + ************************************************************************/ + +#ifndef TRIO_TRIOP_H +#define TRIO_TRIOP_H + +#include "triodef.h" + +#include <stdlib.h> +#if defined(TRIO_COMPILER_ANCIENT) +# include <varargs.h> +#else +# include <stdarg.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef TRIO_C99 +# define TRIO_C99 1 +#endif +#ifndef TRIO_BSD +# define TRIO_BSD 1 +#endif +#ifndef TRIO_GNU +# define TRIO_GNU 1 +#endif +#ifndef TRIO_MISC +# define TRIO_MISC 1 +#endif +#ifndef TRIO_UNIX98 +# define TRIO_UNIX98 1 +#endif +#ifndef TRIO_MICROSOFT +# define TRIO_MICROSOFT 1 +#endif +#ifndef TRIO_EXTENSION +# define TRIO_EXTENSION 1 +#endif +#ifndef TRIO_WIDECHAR /* Does not work yet. Do not enable */ +# define TRIO_WIDECHAR 0 +#endif +#ifndef TRIO_ERRORS +# define TRIO_ERRORS 1 +#endif + +#ifndef TRIO_MALLOC +# define TRIO_MALLOC(n) malloc(n) +#endif +#ifndef TRIO_REALLOC +# define TRIO_REALLOC(x,n) realloc((x),(n)) +#endif +#ifndef TRIO_FREE +# define TRIO_FREE(x) free(x) +#endif + + +/************************************************************************* + * User-defined specifiers + */ + +typedef int (*trio_callback_t) TRIO_PROTO((trio_pointer_t)); + +trio_pointer_t trio_register TRIO_PROTO((trio_callback_t callback, const char *name)); +void trio_unregister TRIO_PROTO((trio_pointer_t handle)); + +TRIO_CONST char *trio_get_format TRIO_PROTO((trio_pointer_t ref)); +trio_pointer_t trio_get_argument TRIO_PROTO((trio_pointer_t ref)); + +/* Modifiers */ +int trio_get_width TRIO_PROTO((trio_pointer_t ref)); +void trio_set_width TRIO_PROTO((trio_pointer_t ref, int width)); +int trio_get_precision TRIO_PROTO((trio_pointer_t ref)); +void trio_set_precision TRIO_PROTO((trio_pointer_t ref, int precision)); +int trio_get_base TRIO_PROTO((trio_pointer_t ref)); +void trio_set_base TRIO_PROTO((trio_pointer_t ref, int base)); +int trio_get_padding TRIO_PROTO((trio_pointer_t ref)); +void trio_set_padding TRIO_PROTO((trio_pointer_t ref, int is_padding)); +int trio_get_short TRIO_PROTO((trio_pointer_t ref)); /* h */ +void trio_set_shortshort TRIO_PROTO((trio_pointer_t ref, int is_shortshort)); +int trio_get_shortshort TRIO_PROTO((trio_pointer_t ref)); /* hh */ +void trio_set_short TRIO_PROTO((trio_pointer_t ref, int is_short)); +int trio_get_long TRIO_PROTO((trio_pointer_t ref)); /* l */ +void trio_set_long TRIO_PROTO((trio_pointer_t ref, int is_long)); +int trio_get_longlong TRIO_PROTO((trio_pointer_t ref)); /* ll */ +void trio_set_longlong TRIO_PROTO((trio_pointer_t ref, int is_longlong)); +int trio_get_longdouble TRIO_PROTO((trio_pointer_t ref)); /* L */ +void trio_set_longdouble TRIO_PROTO((trio_pointer_t ref, int is_longdouble)); +int trio_get_alternative TRIO_PROTO((trio_pointer_t ref)); /* # */ +void trio_set_alternative TRIO_PROTO((trio_pointer_t ref, int is_alternative)); +int trio_get_alignment TRIO_PROTO((trio_pointer_t ref)); /* - */ +void trio_set_alignment TRIO_PROTO((trio_pointer_t ref, int is_leftaligned)); +int trio_get_spacing TRIO_PROTO((trio_pointer_t ref)); /* TRIO_PROTO((space) */ +void trio_set_spacing TRIO_PROTO((trio_pointer_t ref, int is_space)); +int trio_get_sign TRIO_PROTO((trio_pointer_t ref)); /* + */ +void trio_set_sign TRIO_PROTO((trio_pointer_t ref, int is_showsign)); +int trio_get_quote TRIO_PROTO((trio_pointer_t ref)); /* ' */ +void trio_set_quote TRIO_PROTO((trio_pointer_t ref, int is_quote)); +int trio_get_upper TRIO_PROTO((trio_pointer_t ref)); +void trio_set_upper TRIO_PROTO((trio_pointer_t ref, int is_upper)); +#if TRIO_C99 +int trio_get_largest TRIO_PROTO((trio_pointer_t ref)); /* j */ +void trio_set_largest TRIO_PROTO((trio_pointer_t ref, int is_largest)); +int trio_get_ptrdiff TRIO_PROTO((trio_pointer_t ref)); /* t */ +void trio_set_ptrdiff TRIO_PROTO((trio_pointer_t ref, int is_ptrdiff)); +int trio_get_size TRIO_PROTO((trio_pointer_t ref)); /* z / Z */ +void trio_set_size TRIO_PROTO((trio_pointer_t ref, int is_size)); +#endif + +/* Printing */ +int trio_print_ref TRIO_PROTO((trio_pointer_t ref, const char *format, ...)); +int trio_vprint_ref TRIO_PROTO((trio_pointer_t ref, const char *format, va_list args)); +int trio_printv_ref TRIO_PROTO((trio_pointer_t ref, const char *format, trio_pointer_t *args)); + +void trio_print_int TRIO_PROTO((trio_pointer_t ref, int number)); +void trio_print_uint TRIO_PROTO((trio_pointer_t ref, unsigned int number)); +/* void trio_print_long TRIO_PROTO((trio_pointer_t ref, long number)); */ +/* void trio_print_ulong TRIO_PROTO((trio_pointer_t ref, unsigned long number)); */ +void trio_print_double TRIO_PROTO((trio_pointer_t ref, double number)); +void trio_print_string TRIO_PROTO((trio_pointer_t ref, char *string)); +void trio_print_pointer TRIO_PROTO((trio_pointer_t ref, trio_pointer_t pointer)); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* TRIO_TRIOP_H */ diff --git a/bundle/libxml/triostr.c b/bundle/libxml/triostr.c new file mode 100644 index 0000000000..43a30dad9f --- /dev/null +++ b/bundle/libxml/triostr.c @@ -0,0 +1,2024 @@ +/************************************************************************* + * + * $Id$ + * + * Copyright (C) 2001 Bjorn Reese and Daniel Stenberg. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + ************************************************************************/ + +/************************************************************************* + * Include files + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <math.h> +#include "triodef.h" +#include "triostr.h" + +/************************************************************************* + * Definitions + */ + +#if !defined(TRIO_STRING_PUBLIC) +# define TRIO_STRING_PUBLIC TRIO_PUBLIC +#endif +#if !defined(TRIO_STRING_PRIVATE) +# define TRIO_STRING_PRIVATE TRIO_PRIVATE +#endif + +#if !defined(NULL) +# define NULL 0 +#endif +#if !defined(NIL) +# define NIL ((char)0) +#endif +#if !defined(FALSE) +# define FALSE (1 == 0) +# define TRUE (! FALSE) +#endif +#if !defined(BOOLEAN_T) +# define BOOLEAN_T int +#endif + +#if defined(TRIO_COMPILER_SUPPORTS_C99) +# define USE_STRTOD +# define USE_STRTOF +#elif defined(TRIO_COMPILER_MSVC) +# define USE_STRTOD +#endif + +#if defined(TRIO_PLATFORM_UNIX) +# define USE_STRCASECMP +# define USE_STRNCASECMP +# define USE_STRERROR +# if defined(TRIO_PLATFORM_QNX) +# define strcasecmp(x,y) stricmp(x,y) +# define strncasecmp(x,y,n) strnicmp(x,y,n) +# endif +#elif defined(TRIO_PLATFORM_WIN32) +# define USE_STRCASECMP +# define strcasecmp(x,y) strcmpi(x,y) +#endif + +/************************************************************************* + * Structures + */ + +struct _trio_string_t +{ + char *content; + size_t length; + size_t allocated; +}; + +/************************************************************************* + * Constants + */ + +#if !defined(TRIO_MINIMAL) +static TRIO_CONST char rcsid[] = "@(#)$Id$"; +#endif + +/************************************************************************* + * Static String Functions + */ + +#if defined(TRIO_DOCUMENTATION) +# include "doc/doc_static.h" +#endif +/** @addtogroup StaticStrings + @{ +*/ + +/** + Create new string. + + @param size Size of new string. + @return Pointer to string, or NULL if allocation failed. +*/ +TRIO_STRING_PUBLIC char * +trio_create +TRIO_ARGS1((size), + size_t size) +{ + return (char *)TRIO_MALLOC(size); +} + + +/** + Destroy string. + + @param string String to be freed. +*/ +TRIO_STRING_PUBLIC void +trio_destroy +TRIO_ARGS1((string), + char *string) +{ + if (string) + { + TRIO_FREE(string); + } +} + + +/** + Count the number of characters in a string. + + @param string String to measure. + @return Number of characters in @string. +*/ +TRIO_STRING_PUBLIC size_t +trio_length +TRIO_ARGS1((string), + TRIO_CONST char *string) +{ + return strlen(string); +} + + +#if !defined(TRIO_MINIMAL) +/** + Append @p source at the end of @p target. + + @param target Target string. + @param source Source string. + @return Boolean value indicating success or failure. + + @pre @p target must point to a memory chunk with sufficient room to + contain the @p target string and @p source string. + @pre No boundary checking is performed, so insufficient memory will + result in a buffer overrun. + @post @p target will be zero terminated. +*/ +TRIO_STRING_PUBLIC int +trio_append +TRIO_ARGS2((target, source), + char *target, + TRIO_CONST char *source) +{ + assert(target); + assert(source); + + return (strcat(target, source) != NULL); +} +#endif /* !defined(TRIO_MINIMAL) */ + +#if !defined(TRIO_MINIMAL) +/** + Append at most @p max characters from @p source to @p target. + + @param target Target string. + @param max Maximum number of characters to append. + @param source Source string. + @return Boolean value indicating success or failure. + + @pre @p target must point to a memory chuck with sufficient room to + contain the @p target string and the @p source string (at most @p max + characters). + @pre No boundary checking is performed, so insufficient memory will + result in a buffer overrun. + @post @p target will be zero terminated. +*/ +TRIO_STRING_PUBLIC int +trio_append_max +TRIO_ARGS3((target, max, source), + char *target, + size_t max, + TRIO_CONST char *source) +{ + size_t length; + + assert(target); + assert(source); + + length = trio_length(target); + + if (max > length) + { + strncat(target, source, max - length - 1); + } + return TRUE; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Determine if a string contains a substring. + + @param string String to be searched. + @param substring String to be found. + @return Boolean value indicating success or failure. +*/ +TRIO_STRING_PUBLIC int +trio_contains +TRIO_ARGS2((string, substring), + TRIO_CONST char *string, + TRIO_CONST char *substring) +{ + assert(string); + assert(substring); + + return (0 != strstr(string, substring)); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Copy @p source to @p target. + + @param target Target string. + @param source Source string. + @return Boolean value indicating success or failure. + + @pre @p target must point to a memory chunk with sufficient room to + contain the @p source string. + @pre No boundary checking is performed, so insufficient memory will + result in a buffer overrun. + @post @p target will be zero terminated. +*/ +TRIO_STRING_PUBLIC int +trio_copy +TRIO_ARGS2((target, source), + char *target, + TRIO_CONST char *source) +{ + assert(target); + assert(source); + + (void)strcpy(target, source); + return TRUE; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/** + Copy at most @p max characters from @p source to @p target. + + @param target Target string. + @param max Maximum number of characters to append. + @param source Source string. + @return Boolean value indicating success or failure. + + @pre @p target must point to a memory chunk with sufficient room to + contain the @p source string (at most @p max characters). + @pre No boundary checking is performed, so insufficient memory will + result in a buffer overrun. + @post @p target will be zero terminated. +*/ +TRIO_STRING_PUBLIC int +trio_copy_max +TRIO_ARGS3((target, max, source), + char *target, + size_t max, + TRIO_CONST char *source) +{ + assert(target); + assert(source); + assert(max > 0); /* Includes != 0 */ + + (void)strncpy(target, source, max - 1); + target[max - 1] = (char)0; + return TRUE; +} + + +/* + * TrioDuplicateMax + */ +TRIO_STRING_PRIVATE char * +TrioDuplicateMax +TRIO_ARGS2((source, size), + TRIO_CONST char *source, + size_t size) +{ + char *target; + + assert(source); + + /* Make room for string plus a terminating zero */ + size++; + target = trio_create(size); + if (target) + { + trio_copy_max(target, size, source); + } + return target; +} + + +/** + Duplicate @p source. + + @param source Source string. + @return A copy of the @p source string. + + @post @p target will be zero terminated. +*/ +TRIO_STRING_PUBLIC char * +trio_duplicate +TRIO_ARGS1((source), + TRIO_CONST char *source) +{ + return TrioDuplicateMax(source, trio_length(source)); +} + + +#if !defined(TRIO_MINIMAL) +/** + Duplicate at most @p max characters of @p source. + + @param source Source string. + @param max Maximum number of characters to duplicate. + @return A copy of the @p source string. + + @post @p target will be zero terminated. +*/ +TRIO_STRING_PUBLIC char * +trio_duplicate_max TRIO_ARGS2((source, max), + TRIO_CONST char *source, + size_t max) +{ + size_t length; + + assert(source); + assert(max > 0); + + length = trio_length(source); + if (length > max) + { + length = max; + } + return TrioDuplicateMax(source, length); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/** + Compare if two strings are equal. + + @param first First string. + @param second Second string. + @return Boolean indicating whether the two strings are equal or not. + + Case-insensitive comparison. +*/ +TRIO_STRING_PUBLIC int +trio_equal +TRIO_ARGS2((first, second), + TRIO_CONST char *first, + TRIO_CONST char *second) +{ + assert(first); + assert(second); + + if ((first != NULL) && (second != NULL)) + { +#if defined(USE_STRCASECMP) + return (0 == strcasecmp(first, second)); +#else + while ((*first != NIL) && (*second != NIL)) + { + if (toupper(*first) != toupper(*second)) + { + break; + } + first++; + second++; + } + return ((*first == NIL) && (*second == NIL)); +#endif + } + return FALSE; +} + + +/** + Compare if two strings are equal. + + @param first First string. + @param second Second string. + @return Boolean indicating whether the two strings are equal or not. + + Case-sensitive comparison. +*/ +TRIO_STRING_PUBLIC int +trio_equal_case +TRIO_ARGS2((first, second), + TRIO_CONST char *first, + TRIO_CONST char *second) +{ + assert(first); + assert(second); + + if ((first != NULL) && (second != NULL)) + { + return (0 == strcmp(first, second)); + } + return FALSE; +} + + +#if !defined(TRIO_MINIMAL) +/** + Compare if two strings up until the first @p max characters are equal. + + @param first First string. + @param max Maximum number of characters to compare. + @param second Second string. + @return Boolean indicating whether the two strings are equal or not. + + Case-sensitive comparison. +*/ +TRIO_STRING_PUBLIC int +trio_equal_case_max +TRIO_ARGS3((first, max, second), + TRIO_CONST char *first, + size_t max, + TRIO_CONST char *second) +{ + assert(first); + assert(second); + + if ((first != NULL) && (second != NULL)) + { + return (0 == strncmp(first, second, max)); + } + return FALSE; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/** + Compare if two strings are equal. + + @param first First string. + @param second Second string. + @return Boolean indicating whether the two strings are equal or not. + + Collating characters are considered equal. +*/ +TRIO_STRING_PUBLIC int +trio_equal_locale +TRIO_ARGS2((first, second), + TRIO_CONST char *first, + TRIO_CONST char *second) +{ + assert(first); + assert(second); + +#if defined(LC_COLLATE) + return (strcoll(first, second) == 0); +#else + return trio_equal(first, second); +#endif +} + + +/** + Compare if two strings up until the first @p max characters are equal. + + @param first First string. + @param max Maximum number of characters to compare. + @param second Second string. + @return Boolean indicating whether the two strings are equal or not. + + Case-insensitive comparison. +*/ +TRIO_STRING_PUBLIC int +trio_equal_max +TRIO_ARGS3((first, max, second), + TRIO_CONST char *first, + size_t max, + TRIO_CONST char *second) +{ + assert(first); + assert(second); + + if ((first != NULL) && (second != NULL)) + { +#if defined(USE_STRNCASECMP) + return (0 == strncasecmp(first, second, max)); +#else + /* Not adequately tested yet */ + size_t cnt = 0; + while ((*first != NIL) && (*second != NIL) && (cnt <= max)) + { + if (toupper(*first) != toupper(*second)) + { + break; + } + first++; + second++; + cnt++; + } + return ((cnt == max) || ((*first == NIL) && (*second == NIL))); +#endif + } + return FALSE; +} + + +/** + Provide a textual description of an error code (errno). + + @param error_number Error number. + @return Textual description of @p error_number. +*/ +TRIO_STRING_PUBLIC TRIO_CONST char * +trio_error +TRIO_ARGS1((error_number), + int error_number) +{ +#if defined(USE_STRERROR) + return strerror(error_number); +#else + return "unknown"; +#endif +} + + +#if !defined(TRIO_MINIMAL) +/** + Format the date/time according to @p format. + + @param target Target string. + @param max Maximum number of characters to format. + @param format Formatting string. + @param datetime Date/time structure. + @return Number of formatted characters. + + The formatting string accepts the same specifiers as the standard C + function strftime. +*/ +TRIO_STRING_PUBLIC size_t +trio_format_date_max +TRIO_ARGS4((target, max, format, datetime), + char *target, + size_t max, + TRIO_CONST char *format, + TRIO_CONST struct tm *datetime) +{ + assert(target); + assert(format); + assert(datetime); + assert(max > 0); + + return strftime(target, max, format, datetime); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Calculate a hash value for a string. + + @param string String to be calculated on. + @param type Hash function. + @return Calculated hash value. + + @p type can be one of the following + @li @c TRIO_HASH_PLAIN Plain hash function. +*/ +TRIO_STRING_PUBLIC unsigned long +trio_hash +TRIO_ARGS2((string, type), + TRIO_CONST char *string, + int type) +{ + unsigned long value = 0L; + char ch; + + assert(string); + + switch (type) + { + case TRIO_HASH_PLAIN: + while ( (ch = *string++) != NIL ) + { + value *= 31; + value += (unsigned long)ch; + } + break; + default: + assert(FALSE); + break; + } + return value; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Find first occurrence of a character in a string. + + @param string String to be searched. + @param character Character to be found. + @param A pointer to the found character, or NULL if character was not found. + */ +TRIO_STRING_PUBLIC char * +trio_index +TRIO_ARGS2((string, character), + TRIO_CONST char *string, + int character) +{ + assert(string); + + return strchr(string, character); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Find last occurrence of a character in a string. + + @param string String to be searched. + @param character Character to be found. + @param A pointer to the found character, or NULL if character was not found. + */ +TRIO_STRING_PUBLIC char * +trio_index_last +TRIO_ARGS2((string, character), + TRIO_CONST char *string, + int character) +{ + assert(string); + + return strchr(string, character); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Convert the alphabetic letters in the string to lower-case. + + @param target String to be converted. + @return Number of processed characters (converted or not). +*/ +TRIO_STRING_PUBLIC int +trio_lower +TRIO_ARGS1((target), + char *target) +{ + assert(target); + + return trio_span_function(target, target, tolower); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Compare two strings using wildcards. + + @param string String to be searched. + @param pattern Pattern, including wildcards, to search for. + @return Boolean value indicating success or failure. + + Case-insensitive comparison. + + The following wildcards can be used + @li @c * Match any number of characters. + @li @c ? Match a single character. +*/ +TRIO_STRING_PUBLIC int +trio_match +TRIO_ARGS2((string, pattern), + TRIO_CONST char *string, + TRIO_CONST char *pattern) +{ + assert(string); + assert(pattern); + + for (; ('*' != *pattern); ++pattern, ++string) + { + if (NIL == *string) + { + return (NIL == *pattern); + } + if ((toupper((int)*string) != toupper((int)*pattern)) + && ('?' != *pattern)) + { + return FALSE; + } + } + /* two-line patch to prevent *too* much recursiveness: */ + while ('*' == pattern[1]) + pattern++; + + do + { + if ( trio_match(string, &pattern[1]) ) + { + return TRUE; + } + } + while (*string++); + + return FALSE; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Compare two strings using wildcards. + + @param string String to be searched. + @param pattern Pattern, including wildcards, to search for. + @return Boolean value indicating success or failure. + + Case-sensitive comparison. + + The following wildcards can be used + @li @c * Match any number of characters. + @li @c ? Match a single character. +*/ +TRIO_STRING_PUBLIC int +trio_match_case +TRIO_ARGS2((string, pattern), + TRIO_CONST char *string, + TRIO_CONST char *pattern) +{ + assert(string); + assert(pattern); + + for (; ('*' != *pattern); ++pattern, ++string) + { + if (NIL == *string) + { + return (NIL == *pattern); + } + if ((*string != *pattern) + && ('?' != *pattern)) + { + return FALSE; + } + } + /* two-line patch to prevent *too* much recursiveness: */ + while ('*' == pattern[1]) + pattern++; + + do + { + if ( trio_match_case(string, &pattern[1]) ) + { + return TRUE; + } + } + while (*string++); + + return FALSE; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Execute a function on each character in string. + + @param target Target string. + @param source Source string. + @param Function Function to be executed. + @return Number of processed characters. +*/ +TRIO_STRING_PUBLIC size_t +trio_span_function +TRIO_ARGS3((target, source, Function), + char *target, + TRIO_CONST char *source, + int (*Function) TRIO_PROTO((int))) +{ + size_t count = 0; + + assert(target); + assert(source); + assert(Function); + + while (*source != NIL) + { + *target++ = Function(*source++); + count++; + } + return count; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Search for a substring in a string. + + @param string String to be searched. + @param substring String to be found. + @return Pointer to first occurrence of @p substring in @p string, or NULL + if no match was found. +*/ +TRIO_STRING_PUBLIC char * +trio_substring +TRIO_ARGS2((string, substring), + TRIO_CONST char *string, + TRIO_CONST char *substring) +{ + assert(string); + assert(substring); + + return strstr(string, substring); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Search for a substring in the first @p max characters of a string. + + @param string String to be searched. + @param max Maximum characters to be searched. + @param substring String to be found. + @return Pointer to first occurrence of @p substring in @p string, or NULL + if no match was found. +*/ +TRIO_STRING_PUBLIC char * +trio_substring_max +TRIO_ARGS3((string, max, substring), + TRIO_CONST char *string, + size_t max, + TRIO_CONST char *substring) +{ + size_t count; + size_t size; + char *result = NULL; + + assert(string); + assert(substring); + + size = trio_length(substring); + if (size <= max) + { + for (count = 0; count <= max - size; count++) + { + if (trio_equal_max(substring, size, &string[count])) + { + result = (char *)&string[count]; + break; + } + } + } + return result; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Tokenize string. + + @param string String to be tokenized. + @param tokens String containing list of delimiting characters. + @return Start of new token. + + @warning @p string will be destroyed. +*/ +TRIO_STRING_PUBLIC char * +trio_tokenize +TRIO_ARGS2((string, delimiters), + char *string, + TRIO_CONST char *delimiters) +{ + assert(delimiters); + + return strtok(string, delimiters); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/** + Convert string to floating-point number. + + @param source String to be converted. + @param endp Pointer to end of the converted string. + @return A floating-point number. + + The following Extended Backus-Naur form is used + @verbatim + double ::= [ <sign> ] + ( <number> | + <number> <decimal_point> <number> | + <decimal_point> <number> ) + [ <exponential> [ <sign> ] <number> ] + number ::= 1*( <digit> ) + digit ::= ( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ) + exponential ::= ( 'e' | 'E' ) + sign ::= ( '-' | '+' ) + decimal_point ::= '.' + @endverbatim +*/ +/* FIXME: Add EBNF for hex-floats */ +TRIO_STRING_PUBLIC trio_long_double_t +trio_to_long_double +TRIO_ARGS2((source, endp), + TRIO_CONST char *source, + char **endp) +{ +#if defined(USE_STRTOLD) + return strtold(source, endp); +#else + int isNegative = FALSE; + int isExponentNegative = FALSE; + trio_long_double_t integer = 0.0; + trio_long_double_t fraction = 0.0; + unsigned long exponent = 0; + trio_long_double_t base; + trio_long_double_t fracdiv = 1.0; + trio_long_double_t value = 0.0; + + /* First try hex-floats */ + if ((source[0] == '0') && ((source[1] == 'x') || (source[1] == 'X'))) + { + base = 16.0; + source += 2; + while (isxdigit((int)*source)) + { + integer *= base; + integer += (isdigit((int)*source) + ? (*source - '0') + : 10 + (toupper((int)*source) - 'A')); + source++; + } + if (*source == '.') + { + source++; + while (isxdigit((int)*source)) + { + fracdiv /= base; + fraction += fracdiv * (isdigit((int)*source) + ? (*source - '0') + : 10 + (toupper((int)*source) - 'A')); + source++; + } + if ((*source == 'p') || (*source == 'P')) + { + source++; + if ((*source == '+') || (*source == '-')) + { + isExponentNegative = (*source == '-'); + source++; + } + while (isdigit((int)*source)) + { + exponent *= (int)base; + exponent += (*source - '0'); + source++; + } + } + } + } + else /* Then try normal decimal floats */ + { + base = 10.0; + isNegative = (*source == '-'); + /* Skip sign */ + if ((*source == '+') || (*source == '-')) + source++; + + /* Integer part */ + while (isdigit((int)*source)) + { + integer *= base; + integer += (*source - '0'); + source++; + } + + if (*source == '.') + { + source++; /* skip decimal point */ + while (isdigit((int)*source)) + { + fracdiv /= base; + fraction += (*source - '0') * fracdiv; + source++; + } + } + if ((*source == 'e') + || (*source == 'E') +#if TRIO_MICROSOFT + || (*source == 'd') + || (*source == 'D') +#endif + ) + { + source++; /* Skip exponential indicator */ + isExponentNegative = (*source == '-'); + if ((*source == '+') || (*source == '-')) + source++; + while (isdigit((int)*source)) + { + exponent *= (int)base; + exponent += (*source - '0'); + source++; + } + } + } + + value = integer + fraction; + if (exponent != 0) + { + if (isExponentNegative) + value /= pow(base, (double)exponent); + else + value *= pow(base, (double)exponent); + } + if (isNegative) + value = -value; + + if (endp) + *endp = (char *)source; + return value; +#endif +} + + +/** + Convert string to floating-point number. + + @param source String to be converted. + @param endp Pointer to end of the converted string. + @return A floating-point number. + + See @ref trio_to_long_double. +*/ +TRIO_STRING_PUBLIC double +trio_to_double +TRIO_ARGS2((source, endp), + TRIO_CONST char *source, + char **endp) +{ +#if defined(USE_STRTOD) + return strtod(source, endp); +#else + return (double)trio_to_long_double(source, endp); +#endif +} + +#if !defined(TRIO_MINIMAL) +/** + Convert string to floating-point number. + + @param source String to be converted. + @param endp Pointer to end of the converted string. + @return A floating-point number. + + See @ref trio_to_long_double. +*/ +TRIO_STRING_PUBLIC float +trio_to_float +TRIO_ARGS2((source, endp), + TRIO_CONST char *source, + char **endp) +{ +#if defined(USE_STRTOF) + return strtof(source, endp); +#else + return (float)trio_to_long_double(source, endp); +#endif +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/** + Convert string to signed integer. + + @param string String to be converted. + @param endp Pointer to end of converted string. + @param base Radix number of number. +*/ +TRIO_STRING_PUBLIC long +trio_to_long +TRIO_ARGS3((string, endp, base), + TRIO_CONST char *string, + char **endp, + int base) +{ + assert(string); + assert((base >= 2) && (base <= 36)); + + return strtol(string, endp, base); +} + + +#if !defined(TRIO_MINIMAL) +/** + Convert string to unsigned integer. + + @param string String to be converted. + @param endp Pointer to end of converted string. + @param base Radix number of number. +*/ +TRIO_STRING_PUBLIC unsigned long +trio_to_unsigned_long +TRIO_ARGS3((string, endp, base), + TRIO_CONST char *string, + char **endp, + int base) +{ + assert(string); + assert((base >= 2) && (base <= 36)); + + return strtoul(string, endp, base); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/** + Convert the alphabetic letters in the string to upper-case. + + @param target The string to be converted. + @return The number of processed characters (converted or not). +*/ +TRIO_STRING_PUBLIC int +trio_upper +TRIO_ARGS1((target), + char *target) +{ + assert(target); + + return trio_span_function(target, target, toupper); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/** @} End of StaticStrings */ + + +/************************************************************************* + * Dynamic String Functions + */ + +#if defined(TRIO_DOCUMENTATION) +# include "doc/doc_dynamic.h" +#endif +/** @addtogroup DynamicStrings + @{ +*/ + +/* + * TrioStringAlloc + */ +TRIO_STRING_PRIVATE trio_string_t * +TrioStringAlloc(TRIO_NOARGS) +{ + trio_string_t *self; + + self = (trio_string_t *)TRIO_MALLOC(sizeof(trio_string_t)); + if (self) + { + self->content = NULL; + self->length = 0; + self->allocated = 0; + } + return self; +} + + +/* + * TrioStringGrow + * + * The size of the string will be increased by 'delta' characters. If + * 'delta' is zero, the size will be doubled. + */ +TRIO_STRING_PRIVATE BOOLEAN_T +TrioStringGrow +TRIO_ARGS2((self, delta), + trio_string_t *self, + size_t delta) +{ + BOOLEAN_T status = FALSE; + char *new_content; + size_t new_size; + + new_size = (delta == 0) + ? ( (self->allocated == 0) ? 1 : self->allocated * 2 ) + : self->allocated + delta; + + new_content = (char *)TRIO_REALLOC(self->content, new_size); + if (new_content) + { + self->content = new_content; + self->allocated = new_size; + status = TRUE; + } + return status; +} + + +/* + * TrioStringGrowTo + * + * The size of the string will be increased to 'length' plus one characters. + * If 'length' is less than the original size, the original size will be + * used (that is, the size of the string is never decreased). + */ +TRIO_STRING_PRIVATE BOOLEAN_T +TrioStringGrowTo +TRIO_ARGS2((self, length), + trio_string_t *self, + size_t length) +{ + length++; /* Room for terminating zero */ + return (self->allocated < length) + ? TrioStringGrow(self, length - self->allocated) + : TRUE; +} + + +#if !defined(TRIO_MINIMAL) +/** + Create a new dynamic string. + + @param initial_size Initial size of the buffer. + @return Newly allocated dynamic string, or NULL if memory allocation failed. +*/ +TRIO_STRING_PUBLIC trio_string_t * +trio_string_create +TRIO_ARGS1((initial_size), + int initial_size) +{ + trio_string_t *self; + + self = TrioStringAlloc(); + if (self) + { + if (TrioStringGrow(self, + (size_t)((initial_size > 0) ? initial_size : 1))) + { + self->content[0] = (char)0; + self->allocated = initial_size; + } + else + { + trio_string_destroy(self); + self = NULL; + } + } + return self; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/** + Deallocate the dynamic string and its contents. + + @param self Dynamic string +*/ +TRIO_STRING_PUBLIC void +trio_string_destroy +TRIO_ARGS1((self), + trio_string_t *self) +{ + assert(self); + + if (self) + { + trio_destroy(self->content); + TRIO_FREE(self); + } +} + + +#if !defined(TRIO_MINIMAL) +/** + Get a pointer to the content. + + @param self Dynamic string. + @param offset Offset into content. + @return Pointer to the content. + + @p Offset can be zero, positive, or negative. If @p offset is zero, + then the start of the content will be returned. If @p offset is positive, + then a pointer to @p offset number of characters from the beginning of the + content is returned. If @p offset is negative, then a pointer to @p offset + number of characters from the ending of the string, starting at the + terminating zero, is returned. +*/ +TRIO_STRING_PUBLIC char * +trio_string_get +TRIO_ARGS2((self, offset), + trio_string_t *self, + int offset) +{ + char *result = NULL; + + assert(self); + + if (self->content != NULL) + { + if (self->length == 0) + { + (void)trio_string_length(self); + } + if (offset >= 0) + { + if (offset > (int)self->length) + { + offset = self->length; + } + } + else + { + offset += self->length + 1; + if (offset < 0) + { + offset = 0; + } + } + result = &(self->content[offset]); + } + return result; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/** + Extract the content. + + @param self Dynamic String + @return Content of dynamic string. + + The content is removed from the dynamic string. This enables destruction + of the dynamic string without deallocation of the content. +*/ +TRIO_STRING_PUBLIC char * +trio_string_extract +TRIO_ARGS1((self), + trio_string_t *self) +{ + char *result; + + assert(self); + + result = self->content; + /* FIXME: Allocate new empty buffer? */ + self->content = NULL; + self->length = self->allocated = 0; + return result; +} + + +#if !defined(TRIO_MINIMAL) +/** + Set the content of the dynamic string. + + @param self Dynamic String + @param buffer The new content. + + Sets the content of the dynamic string to a copy @p buffer. + An existing content will be deallocated first, if necessary. + + @remark + This function will make a copy of @p buffer. + You are responsible for deallocating @p buffer yourself. +*/ +TRIO_STRING_PUBLIC void +trio_xstring_set +TRIO_ARGS2((self, buffer), + trio_string_t *self, + char *buffer) +{ + assert(self); + + trio_destroy(self->content); + self->content = trio_duplicate(buffer); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/* + * trio_string_size + */ +TRIO_STRING_PUBLIC int +trio_string_size +TRIO_ARGS1((self), + trio_string_t *self) +{ + assert(self); + + return self->allocated; +} + + +/* + * trio_string_terminate + */ +TRIO_STRING_PUBLIC void +trio_string_terminate +TRIO_ARGS1((self), + trio_string_t *self) +{ + trio_xstring_append_char(self, 0); +} + + +#if !defined(TRIO_MINIMAL) +/** + Append the second string to the first. + + @param self Dynamic string to be modified. + @param other Dynamic string to copy from. + @return Boolean value indicating success or failure. +*/ +TRIO_STRING_PUBLIC int +trio_string_append +TRIO_ARGS2((self, other), + trio_string_t *self, + trio_string_t *other) +{ + size_t length; + + assert(self); + assert(other); + + length = self->length + other->length; + if (!TrioStringGrowTo(self, length)) + goto error; + trio_copy(&self->content[self->length], other->content); + self->length = length; + return TRUE; + + error: + return FALSE; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_append + */ +TRIO_STRING_PUBLIC int +trio_xstring_append +TRIO_ARGS2((self, other), + trio_string_t *self, + TRIO_CONST char *other) +{ + size_t length; + + assert(self); + assert(other); + + length = self->length + trio_length(other); + if (!TrioStringGrowTo(self, length)) + goto error; + trio_copy(&self->content[self->length], other); + self->length = length; + return TRUE; + + error: + return FALSE; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/* + * trio_xstring_append_char + */ +TRIO_STRING_PUBLIC int +trio_xstring_append_char +TRIO_ARGS2((self, character), + trio_string_t *self, + char character) +{ + assert(self); + + if ((int)self->length >= trio_string_size(self)) + { + if (!TrioStringGrow(self, 0)) + goto error; + } + self->content[self->length] = character; + self->length++; + return TRUE; + + error: + return FALSE; +} + + +#if !defined(TRIO_MINIMAL) +/** + Search for the first occurrence of second parameter in the first. + + @param self Dynamic string to be modified. + @param other Dynamic string to copy from. + @return Boolean value indicating success or failure. +*/ +TRIO_STRING_PUBLIC int +trio_string_contains +TRIO_ARGS2((self, other), + trio_string_t *self, + trio_string_t *other) +{ + assert(self); + assert(other); + + return trio_contains(self->content, other->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_contains + */ +TRIO_STRING_PUBLIC int +trio_xstring_contains +TRIO_ARGS2((self, other), + trio_string_t *self, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + return trio_contains(self->content, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_copy + */ +TRIO_STRING_PUBLIC int +trio_string_copy +TRIO_ARGS2((self, other), + trio_string_t *self, + trio_string_t *other) +{ + assert(self); + assert(other); + + self->length = 0; + return trio_string_append(self, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_copy + */ +TRIO_STRING_PUBLIC int +trio_xstring_copy +TRIO_ARGS2((self, other), + trio_string_t *self, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + self->length = 0; + return trio_xstring_append(self, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_duplicate + */ +TRIO_STRING_PUBLIC trio_string_t * +trio_string_duplicate +TRIO_ARGS1((other), + trio_string_t *other) +{ + trio_string_t *self; + + assert(other); + + self = TrioStringAlloc(); + if (self) + { + self->content = TrioDuplicateMax(other->content, other->length); + if (self->content) + { + self->length = other->length; + self->allocated = self->length + 1; + } + else + { + self->length = self->allocated = 0; + } + } + return self; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +/* + * trio_xstring_duplicate + */ +TRIO_STRING_PUBLIC trio_string_t * +trio_xstring_duplicate +TRIO_ARGS1((other), + TRIO_CONST char *other) +{ + trio_string_t *self; + + assert(other); + + self = TrioStringAlloc(); + if (self) + { + self->content = TrioDuplicateMax(other, trio_length(other)); + if (self->content) + { + self->length = trio_length(self->content); + self->allocated = self->length + 1; + } + else + { + self->length = self->allocated = 0; + } + } + return self; +} + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_equal + */ +TRIO_STRING_PUBLIC int +trio_string_equal +TRIO_ARGS2((self, other), + trio_string_t *self, + trio_string_t *other) +{ + assert(self); + assert(other); + + return trio_equal(self->content, other->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_equal + */ +TRIO_STRING_PUBLIC int +trio_xstring_equal +TRIO_ARGS2((self, other), + trio_string_t *self, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + return trio_equal(self->content, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_equal_max + */ +TRIO_STRING_PUBLIC int +trio_string_equal_max +TRIO_ARGS3((self, max, other), + trio_string_t *self, + size_t max, + trio_string_t *other) +{ + assert(self); + assert(other); + + return trio_equal_max(self->content, max, other->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_equal_max + */ +TRIO_STRING_PUBLIC int +trio_xstring_equal_max +TRIO_ARGS3((self, max, other), + trio_string_t *self, + size_t max, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + return trio_equal_max(self->content, max, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_equal_case + */ +TRIO_STRING_PUBLIC int +trio_string_equal_case +TRIO_ARGS2((self, other), + trio_string_t *self, + trio_string_t *other) +{ + assert(self); + assert(other); + + return trio_equal_case(self->content, other->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_equal_case + */ +TRIO_STRING_PUBLIC int +trio_xstring_equal_case +TRIO_ARGS2((self, other), + trio_string_t *self, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + return trio_equal_case(self->content, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_equal_case_max + */ +TRIO_STRING_PUBLIC int +trio_string_equal_case_max +TRIO_ARGS3((self, max, other), + trio_string_t *self, + size_t max, + trio_string_t *other) +{ + assert(self); + assert(other); + + return trio_equal_case_max(self->content, max, other->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_equal_case_max + */ +TRIO_STRING_PUBLIC int +trio_xstring_equal_case_max +TRIO_ARGS3((self, max, other), + trio_string_t *self, + size_t max, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + return trio_equal_case_max(self->content, max, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_format_data_max + */ +TRIO_STRING_PUBLIC size_t +trio_string_format_date_max +TRIO_ARGS4((self, max, format, datetime), + trio_string_t *self, + size_t max, + TRIO_CONST char *format, + TRIO_CONST struct tm *datetime) +{ + assert(self); + + return trio_format_date_max(self->content, max, format, datetime); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_index + */ +TRIO_STRING_PUBLIC char * +trio_string_index +TRIO_ARGS2((self, character), + trio_string_t *self, + int character) +{ + assert(self); + + return trio_index(self->content, character); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_index_last + */ +TRIO_STRING_PUBLIC char * +trio_string_index_last +TRIO_ARGS2((self, character), + trio_string_t *self, + int character) +{ + assert(self); + + return trio_index_last(self->content, character); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_length + */ +TRIO_STRING_PUBLIC int +trio_string_length +TRIO_ARGS1((self), + trio_string_t *self) +{ + assert(self); + + if (self->length == 0) + { + self->length = trio_length(self->content); + } + return self->length; +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_lower + */ +TRIO_STRING_PUBLIC int +trio_string_lower +TRIO_ARGS1((self), + trio_string_t *self) +{ + assert(self); + + return trio_lower(self->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_match + */ +TRIO_STRING_PUBLIC int +trio_string_match +TRIO_ARGS2((self, other), + trio_string_t *self, + trio_string_t *other) +{ + assert(self); + assert(other); + + return trio_match(self->content, other->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_match + */ +TRIO_STRING_PUBLIC int +trio_xstring_match +TRIO_ARGS2((self, other), + trio_string_t *self, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + return trio_match(self->content, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_match_case + */ +TRIO_STRING_PUBLIC int +trio_string_match_case +TRIO_ARGS2((self, other), + trio_string_t *self, + trio_string_t *other) +{ + assert(self); + assert(other); + + return trio_match_case(self->content, other->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_match_case + */ +TRIO_STRING_PUBLIC int +trio_xstring_match_case +TRIO_ARGS2((self, other), + trio_string_t *self, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + return trio_match_case(self->content, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_substring + */ +TRIO_STRING_PUBLIC char * +trio_string_substring +TRIO_ARGS2((self, other), + trio_string_t *self, + trio_string_t *other) +{ + assert(self); + assert(other); + + return trio_substring(self->content, other->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_xstring_substring + */ +TRIO_STRING_PUBLIC char * +trio_xstring_substring +TRIO_ARGS2((self, other), + trio_string_t *self, + TRIO_CONST char *other) +{ + assert(self); + assert(other); + + return trio_substring(self->content, other); +} +#endif /* !defined(TRIO_MINIMAL) */ + + +#if !defined(TRIO_MINIMAL) +/* + * trio_string_upper + */ +TRIO_STRING_PUBLIC int +trio_string_upper +TRIO_ARGS1((self), + trio_string_t *self) +{ + assert(self); + + return trio_upper(self->content); +} +#endif /* !defined(TRIO_MINIMAL) */ + +/** @} End of DynamicStrings */ diff --git a/bundle/libxml/triostr.h b/bundle/libxml/triostr.h new file mode 100644 index 0000000000..083e2a52ff --- /dev/null +++ b/bundle/libxml/triostr.h @@ -0,0 +1,127 @@ +/************************************************************************* + * + * $Id$ + * + * Copyright (C) 2001 Bjorn Reese and Daniel Stenberg. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + ************************************************************************/ + +#ifndef TRIO_TRIOSTR_H +#define TRIO_TRIOSTR_H + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "triodef.h" +#include "triop.h" + +enum { + TRIO_HASH_NONE = 0, + TRIO_HASH_PLAIN, + TRIO_HASH_TWOSIGNED +}; + +#if !defined(TRIO_STRING_PUBLIC) +# if !defined(TRIO_PUBLIC) +# define TRIO_PUBLIC +# endif +# define TRIO_STRING_PUBLIC TRIO_PUBLIC +#endif + +/************************************************************************* + * String functions + */ + +TRIO_STRING_PUBLIC int trio_append TRIO_PROTO((char *target, const char *source)); +TRIO_STRING_PUBLIC int trio_append_max TRIO_PROTO((char *target, size_t max, const char *source)); +TRIO_STRING_PUBLIC int trio_contains TRIO_PROTO((const char *string, const char *substring)); +TRIO_STRING_PUBLIC int trio_copy TRIO_PROTO((char *target, const char *source)); +TRIO_STRING_PUBLIC int trio_copy_max TRIO_PROTO((char *target, size_t max, const char *source)); +TRIO_STRING_PUBLIC char *trio_create TRIO_PROTO((size_t size)); +TRIO_STRING_PUBLIC void trio_destroy TRIO_PROTO((char *string)); +TRIO_STRING_PUBLIC char *trio_duplicate TRIO_PROTO((const char *source)); +TRIO_STRING_PUBLIC char *trio_duplicate_max TRIO_PROTO((const char *source, size_t max)); +TRIO_STRING_PUBLIC int trio_equal TRIO_PROTO((const char *first, const char *second)); +TRIO_STRING_PUBLIC int trio_equal_case TRIO_PROTO((const char *first, const char *second)); +TRIO_STRING_PUBLIC int trio_equal_case_max TRIO_PROTO((const char *first, size_t max, const char *second)); +TRIO_STRING_PUBLIC int trio_equal_locale TRIO_PROTO((const char *first, const char *second)); +TRIO_STRING_PUBLIC int trio_equal_max TRIO_PROTO((const char *first, size_t max, const char *second)); +TRIO_STRING_PUBLIC TRIO_CONST char *trio_error TRIO_PROTO((int)); +TRIO_STRING_PUBLIC size_t trio_format_date_max TRIO_PROTO((char *target, size_t max, const char *format, const struct tm *datetime)); +TRIO_STRING_PUBLIC unsigned long trio_hash TRIO_PROTO((const char *string, int type)); +TRIO_STRING_PUBLIC char *trio_index TRIO_PROTO((const char *string, int character)); +TRIO_STRING_PUBLIC char *trio_index_last TRIO_PROTO((const char *string, int character)); +TRIO_STRING_PUBLIC size_t trio_length TRIO_PROTO((const char *string)); +TRIO_STRING_PUBLIC int trio_lower TRIO_PROTO((char *target)); +TRIO_STRING_PUBLIC int trio_match TRIO_PROTO((const char *string, const char *pattern)); +TRIO_STRING_PUBLIC int trio_match_case TRIO_PROTO((const char *string, const char *pattern)); +TRIO_STRING_PUBLIC size_t trio_span_function TRIO_PROTO((char *target, const char *source, int (*Function) TRIO_PROTO((int)))); +TRIO_STRING_PUBLIC char *trio_substring TRIO_PROTO((const char *string, const char *substring)); +TRIO_STRING_PUBLIC char *trio_substring_max TRIO_PROTO((const char *string, size_t max, const char *substring)); +TRIO_STRING_PUBLIC double trio_to_double TRIO_PROTO((const char *source, char **endp)); +TRIO_STRING_PUBLIC float trio_to_float TRIO_PROTO((const char *source, char **endp)); +TRIO_STRING_PUBLIC trio_long_double_t trio_to_long_double TRIO_PROTO((const char *source, char **endp)); +TRIO_STRING_PUBLIC long trio_to_long TRIO_PROTO((const char *source, char **endp, int base)); +TRIO_STRING_PUBLIC unsigned long trio_to_unsigned_long TRIO_PROTO((const char *source, char **endp, int base)); +TRIO_STRING_PUBLIC char *trio_tokenize TRIO_PROTO((char *string, const char *delimiters)); +TRIO_STRING_PUBLIC int trio_upper TRIO_PROTO((char *target)); + +/************************************************************************* + * Dynamic string functions + */ + +/* + * Opaque type for dynamic strings + */ +typedef struct _trio_string_t trio_string_t; + +TRIO_STRING_PUBLIC trio_string_t *trio_string_create TRIO_PROTO((int initial_size)); +TRIO_STRING_PUBLIC void trio_string_destroy TRIO_PROTO((trio_string_t *self)); +TRIO_STRING_PUBLIC char *trio_string_get TRIO_PROTO((trio_string_t *self, int offset)); +TRIO_STRING_PUBLIC void trio_xstring_set TRIO_PROTO((trio_string_t *self, char *buffer)); +TRIO_STRING_PUBLIC char *trio_string_extract TRIO_PROTO((trio_string_t *self)); +TRIO_STRING_PUBLIC int trio_string_size TRIO_PROTO((trio_string_t *self)); +TRIO_STRING_PUBLIC void trio_string_terminate TRIO_PROTO((trio_string_t *self)); + +TRIO_STRING_PUBLIC int trio_string_append TRIO_PROTO((trio_string_t *self, trio_string_t *other)); +TRIO_STRING_PUBLIC int trio_string_contains TRIO_PROTO((trio_string_t *self, trio_string_t *other)); +TRIO_STRING_PUBLIC int trio_string_copy TRIO_PROTO((trio_string_t *self, trio_string_t *other)); +TRIO_STRING_PUBLIC trio_string_t *trio_string_duplicate TRIO_PROTO((trio_string_t *other)); +TRIO_STRING_PUBLIC int trio_string_equal TRIO_PROTO((trio_string_t *self, trio_string_t *other)); +TRIO_STRING_PUBLIC int trio_string_equal_max TRIO_PROTO((trio_string_t *self, size_t max, trio_string_t *second)); +TRIO_STRING_PUBLIC int trio_string_equal_case TRIO_PROTO((trio_string_t *self, trio_string_t *other)); +TRIO_STRING_PUBLIC int trio_string_equal_case_max TRIO_PROTO((trio_string_t *self, size_t max, trio_string_t *other)); +TRIO_STRING_PUBLIC size_t trio_string_format_date_max TRIO_PROTO((trio_string_t *self, size_t max, const char *format, const struct tm *datetime)); +TRIO_STRING_PUBLIC char *trio_string_index TRIO_PROTO((trio_string_t *self, int character)); +TRIO_STRING_PUBLIC char *trio_string_index_last TRIO_PROTO((trio_string_t *self, int character)); +TRIO_STRING_PUBLIC int trio_string_length TRIO_PROTO((trio_string_t *self)); +TRIO_STRING_PUBLIC int trio_string_lower TRIO_PROTO((trio_string_t *self)); +TRIO_STRING_PUBLIC int trio_string_match TRIO_PROTO((trio_string_t *self, trio_string_t *other)); +TRIO_STRING_PUBLIC int trio_string_match_case TRIO_PROTO((trio_string_t *self, trio_string_t *other)); +TRIO_STRING_PUBLIC char *trio_string_substring TRIO_PROTO((trio_string_t *self, trio_string_t *other)); +TRIO_STRING_PUBLIC int trio_string_upper TRIO_PROTO((trio_string_t *self)); + +TRIO_STRING_PUBLIC int trio_xstring_append_char TRIO_PROTO((trio_string_t *self, char character)); +TRIO_STRING_PUBLIC int trio_xstring_append TRIO_PROTO((trio_string_t *self, const char *other)); +TRIO_STRING_PUBLIC int trio_xstring_contains TRIO_PROTO((trio_string_t *self, const char *other)); +TRIO_STRING_PUBLIC int trio_xstring_copy TRIO_PROTO((trio_string_t *self, const char *other)); +TRIO_STRING_PUBLIC trio_string_t *trio_xstring_duplicate TRIO_PROTO((const char *other)); +TRIO_STRING_PUBLIC int trio_xstring_equal TRIO_PROTO((trio_string_t *self, const char *other)); +TRIO_STRING_PUBLIC int trio_xstring_equal_max TRIO_PROTO((trio_string_t *self, size_t max, const char *other)); +TRIO_STRING_PUBLIC int trio_xstring_equal_case TRIO_PROTO((trio_string_t *self, const char *other)); +TRIO_STRING_PUBLIC int trio_xstring_equal_case_max TRIO_PROTO((trio_string_t *self, size_t max, const char *other)); +TRIO_STRING_PUBLIC int trio_xstring_match TRIO_PROTO((trio_string_t *self, const char *other)); +TRIO_STRING_PUBLIC int trio_xstring_match_case TRIO_PROTO((trio_string_t *self, const char *other)); +TRIO_STRING_PUBLIC char *trio_xstring_substring TRIO_PROTO((trio_string_t *self, const char *other)); + +#endif /* TRIO_TRIOSTR_H */ diff --git a/bundle/libxml/uri.c b/bundle/libxml/uri.c new file mode 100644 index 0000000000..1f4ab600bd --- /dev/null +++ b/bundle/libxml/uri.c @@ -0,0 +1,1962 @@ +/** + * uri.c: set of generic URI related routines + * + * Reference: RFC 2396 + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> + +#include <libxml/xmlmemory.h> +#include <libxml/uri.h> +#include <libxml/globals.h> +#include <libxml/xmlerror.h> + +/************************************************************************ + * * + * Macros to differentiate various character type * + * directly extracted from RFC 2396 * + * * + ************************************************************************/ + +/* + * alpha = lowalpha | upalpha + */ +#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) + + +/* + * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | + * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | + * "u" | "v" | "w" | "x" | "y" | "z" + */ + +#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) + +/* + * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | + * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | + * "U" | "V" | "W" | "X" | "Y" | "Z" + */ +#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) + +/* + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + */ + +#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) + +/* + * alphanum = alpha | digit + */ + +#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) + +/* + * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | + * "a" | "b" | "c" | "d" | "e" | "f" + */ + +#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \ + (((x) >= 'A') && ((x) <= 'F'))) + +/* + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + */ + +#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ + ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ + ((x) == '(') || ((x) == ')')) + + +/* + * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," + */ + +#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ + ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ + ((x) == '+') || ((x) == '$') || ((x) == ',')) + +/* + * unreserved = alphanum | mark + */ + +#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) + +/* + * escaped = "%" hex hex + */ + +#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \ + (IS_HEX((p)[2]))) + +/* + * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | + * "&" | "=" | "+" | "$" | "," + */ +#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\ + ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\ + ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\ + ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) + +/* + * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | "," + */ +#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\ + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\ + ((*(p) == ','))) + +/* + * rel_segment = 1*( unreserved | escaped | + * ";" | "@" | "&" | "=" | "+" | "$" | "," ) + */ + +#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \ + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ + ((*(p) == ','))) + +/* + * scheme = alpha *( alpha | digit | "+" | "-" | "." ) + */ + +#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \ + ((x) == '+') || ((x) == '-') || ((x) == '.')) + +/* + * reg_name = 1*( unreserved | escaped | "$" | "," | + * ";" | ":" | "@" | "&" | "=" | "+" ) + */ + +#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \ + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \ + ((*(p) == '=')) || ((*(p) == '+'))) + +/* + * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" | + * "+" | "$" | "," ) + */ +#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \ + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ + ((*(p) == ','))) + +/* + * uric = reserved | unreserved | escaped + */ + +#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + (IS_RESERVED(*(p)))) + +/* +* unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" +*/ + +#define IS_UNWISE(p) \ + (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ + ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ + ((*(p) == ']')) || ((*(p) == '`'))) + +/* + * Skip to next pointer char, handle escaped sequences + */ + +#define NEXT(p) ((*p == '%')? p += 3 : p++) + +/* + * Productions from the spec. + * + * authority = server | reg_name + * reg_name = 1*( unreserved | escaped | "$" | "," | + * ";" | ":" | "@" | "&" | "=" | "+" ) + * + * path = [ abs_path | opaque_part ] + */ + +/************************************************************************ + * * + * Generic URI structure functions * + * * + ************************************************************************/ + +/** + * xmlCreateURI: + * + * Simply creates an empty xmlURI + * + * Returns the new structure or NULL in case of error + */ +xmlURIPtr +xmlCreateURI(void) { + xmlURIPtr ret; + + ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCreateURI: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlURI)); + return(ret); +} + +/** + * xmlSaveUri: + * @uri: pointer to an xmlURI + * + * Save the URI as an escaped string + * + * Returns a new string (to be deallocated by caller) + */ +xmlChar * +xmlSaveUri(xmlURIPtr uri) { + xmlChar *ret = NULL; + const char *p; + int len; + int max; + + if (uri == NULL) return(NULL); + + + max = 80; + ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + len = 0; + + if (uri->scheme != NULL) { + p = uri->scheme; + while (*p != 0) { + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = *p++; + } + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = ':'; + } + if (uri->opaque != NULL) { + p = uri->opaque; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) || + ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) || + ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else { + if (uri->server != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '/'; + ret[len++] = '/'; + if (uri->user != NULL) { + p = uri->user; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == ';')) || ((*(p) == ':')) || + ((*(p) == '&')) || ((*(p) == '=')) || + ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '@'; + } + p = uri->server; + while (*p != 0) { + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = *p++; + } + if (uri->port > 0) { + if (len + 10 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); + } + } else if (uri->authority != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '/'; + ret[len++] = '/'; + p = uri->authority; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+'))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else if (uri->scheme != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '/'; + ret[len++] = '/'; + } + if (uri->path != NULL) { + p = uri->path; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || + ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (uri->query != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '?'; + p = uri->query; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + } + if (uri->fragment != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '#'; + p = uri->fragment; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = 0; + return(ret); +} + +/** + * xmlPrintURI: + * @stream: a FILE* for the output + * @uri: pointer to an xmlURI + * + * Prints the URI in the stream @steam. + */ +void +xmlPrintURI(FILE *stream, xmlURIPtr uri) { + xmlChar *out; + + out = xmlSaveUri(uri); + if (out != NULL) { + fprintf(stream, "%s", out); + xmlFree(out); + } +} + +/** + * xmlCleanURI: + * @uri: pointer to an xmlURI + * + * Make sure the xmlURI struct is free of content + */ +static void +xmlCleanURI(xmlURIPtr uri) { + if (uri == NULL) return; + + if (uri->scheme != NULL) xmlFree(uri->scheme); + uri->scheme = NULL; + if (uri->server != NULL) xmlFree(uri->server); + uri->server = NULL; + if (uri->user != NULL) xmlFree(uri->user); + uri->user = NULL; + if (uri->path != NULL) xmlFree(uri->path); + uri->path = NULL; + if (uri->fragment != NULL) xmlFree(uri->fragment); + uri->fragment = NULL; + if (uri->opaque != NULL) xmlFree(uri->opaque); + uri->opaque = NULL; + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = NULL; + if (uri->query != NULL) xmlFree(uri->query); + uri->query = NULL; +} + +/** + * xmlFreeURI: + * @uri: pointer to an xmlURI + * + * Free up the xmlURI struct + */ +void +xmlFreeURI(xmlURIPtr uri) { + if (uri == NULL) return; + + if (uri->scheme != NULL) xmlFree(uri->scheme); + if (uri->server != NULL) xmlFree(uri->server); + if (uri->user != NULL) xmlFree(uri->user); + if (uri->path != NULL) xmlFree(uri->path); + if (uri->fragment != NULL) xmlFree(uri->fragment); + if (uri->opaque != NULL) xmlFree(uri->opaque); + if (uri->authority != NULL) xmlFree(uri->authority); + if (uri->query != NULL) xmlFree(uri->query); + xmlFree(uri); +} + +/************************************************************************ + * * + * Helper functions * + * * + ************************************************************************/ + +/** + * xmlNormalizeURIPath: + * @path: pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +int +xmlNormalizeURIPath(char *path) { + char *cur, *out; + + if (path == NULL) + return(-1); + + /* Skip all initial "/" chars. We want to get to the beginning of the + * first non-empty segment. + */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* Keep everything we've seen so far. */ + out = cur; + + /* + * Analyze each segment in sequence for cases (c) and (d). + */ + while (cur[0] != '\0') { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((cur[0] == '.') && (cur[1] == '/')) { + cur += 2; + /* '//' normalization should be done at this point too */ + while (cur[0] == '/') + cur++; + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((cur[0] == '.') && (cur[1] == '\0')) + break; + + /* Otherwise keep the segment. */ + while (cur[0] != '/') { + if (cur[0] == '\0') + goto done_cd; + (out++)[0] = (cur++)[0]; + } + /* nomalize // */ + while ((cur[0] == '/') && (cur[1] == '/')) + cur++; + + (out++)[0] = (cur++)[0]; + } + done_cd: + out[0] = '\0'; + + /* Reset to the beginning of the first segment for the next sequence. */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* + * Analyze each segment in sequence for cases (e) and (f). + * + * e) All occurrences of "<segment>/../", where <segment> is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + * + * f) If the buffer string ends with "<segment>/..", where <segment> + * is a complete path segment not equal to "..", that + * "<segment>/.." is removed. + * + * To satisfy the "iterative" clause in (e), we need to collapse the + * string every time we find something that needs to be removed. Thus, + * we don't need to keep two pointers into the string: we only need a + * "current position" pointer. + */ + while (1) { + char *segp; + + /* At the beginning of each iteration of this loop, "cur" points to + * the first character of the segment we want to examine. + */ + + /* Find the end of the current segment. */ + segp = cur; + while ((segp[0] != '/') && (segp[0] != '\0')) + ++segp; + + /* If this is the last segment, we're done (we need at least two + * segments to meet the criteria for the (e) and (f) cases). + */ + if (segp[0] == '\0') + break; + + /* If the first segment is "..", or if the next segment _isn't_ "..", + * keep this segment and try the next one. + */ + ++segp; + if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) + || ((segp[0] != '.') || (segp[1] != '.') + || ((segp[2] != '/') && (segp[2] != '\0')))) { + cur = segp; + continue; + } + + /* If we get here, remove this segment and the next one and back up + * to the previous segment (if there is one), to implement the + * "iteratively" clause. It's pretty much impossible to back up + * while maintaining two pointers into the buffer, so just compact + * the whole buffer now. + */ + + /* If this is the end of the buffer, we're done. */ + if (segp[2] == '\0') { + cur[0] = '\0'; + break; + } + strcpy(cur, segp + 3); + + /* If there are no previous segments, then keep going from here. */ + segp = cur; + while ((segp > path) && ((--segp)[0] == '/')) + ; + if (segp == path) + continue; + + /* "segp" is pointing to the end of a previous segment; find it's + * start. We need to back up to the previous segment and start + * over with that to handle things like "foo/bar/../..". If we + * don't do this, then on the first pass we'll remove the "bar/..", + * but be pointing at the second ".." so we won't realize we can also + * remove the "foo/..". + */ + cur = segp; + while ((cur > path) && (cur[-1] != '/')) + --cur; + } + out[0] = '\0'; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + if (path[0] == '/') { + cur = path; + while ((cur[1] == '.') && (cur[2] == '.') + && ((cur[3] == '/') || (cur[3] == '\0'))) + cur += 3; + + if (cur != path) { + out = path; + while (cur[0] != '\0') + (out++)[0] = (cur++)[0]; + out[0] = 0; + } + } + + return(0); +} + +/** + * xmlURIUnescapeString: + * @str: the string to unescape + * @len: the length in bytes to unescape (or <= 0 to indicate full string) + * @target: optional destination buffer + * + * Unescaping routine, does not do validity checks ! + * Output is direct unsigned char translation of %XX values (no encoding) + * + * Returns an copy of the string, but unescaped + */ +char * +xmlURIUnescapeString(const char *str, int len, char *target) { + char *ret, *out; + const char *in; + + if (str == NULL) + return(NULL); + if (len <= 0) len = strlen(str); + if (len <= 0) return(NULL); + + if (target == NULL) { + ret = (char *) xmlMalloc(len + 1); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlURIUnescapeString: out of memory\n"); + return(NULL); + } + } else + ret = target; + in = str; + out = ret; + while(len > 0) { + if (*in == '%') { + in++; + if ((*in >= '0') && (*in <= '9')) + *out = (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = (*in - 'A') + 10; + in++; + if ((*in >= '0') && (*in <= '9')) + *out = *out * 16 + (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = *out * 16 + (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = *out * 16 + (*in - 'A') + 10; + in++; + len -= 3; + out++; + } else { + *out++ = *in++; + len--; + } + } + *out = 0; + return(ret); +} + +/** + * xmlURIEscapeStr: + * @str: string to escape + * @list: exception list string of chars not to escape + * + * This routine escapes a string to hex, ignoring reserved characters (a-z) + * and the characters in the exception list. + * + * Returns a new escaped string or NULL in case of error. + */ +xmlChar * +xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { + xmlChar *ret, ch; + const xmlChar *in; + + unsigned int len, out; + + if (str == NULL) + return(NULL); + len = xmlStrlen(str); + if (!(len > 0)) return(NULL); + + len += 20; + ret = (xmlChar *) xmlMalloc(len); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlURIEscapeStr: out of memory\n"); + return(NULL); + } + in = (const xmlChar *) str; + out = 0; + while(*in != 0) { + if (len - out <= 3) { + len += 20; + ret = (xmlChar *) xmlRealloc(ret, len); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlURIEscapeStr: out of memory\n"); + return(NULL); + } + } + + ch = *in; + + if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { + unsigned char val; + ret[out++] = '%'; + val = ch >> 4; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + val = ch & 0xF; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + in++; + } else { + ret[out++] = *in++; + } + + } + ret[out] = 0; + return(ret); +} + +/** + * xmlURIEscape: + * @str: the string of the URI to escape + * + * Escaping routine, does not do validity checks ! + * It will try to escape the chars needing this, but this is heuristic + * based it's impossible to be sure. + * + * Returns an copy of the string, but escaped + * + * 25 May 2001 + * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly + * according to RFC2396. + * - Carl Douglas + */ +xmlChar * +xmlURIEscape(const xmlChar * str) +{ + xmlChar *ret, *segment = NULL; + xmlURIPtr uri; + int ret2; + +#define NULLCHK(p) if(!p) { \ + xmlGenericError(xmlGenericErrorContext, \ + "xmlURIEscape: out of memory\n"); \ + return NULL; } + + if (str == NULL) + return (NULL); + + uri = xmlCreateURI(); + if (uri != NULL) { + /* + * Allow escaping errors in the unescaped form + */ + uri->cleanup = 1; + ret2 = xmlParseURIReference(uri, (const char *)str); + if (ret2) { + xmlFreeURI(uri); + return (NULL); + } + } + + if (!uri) + return NULL; + + ret = NULL; + + if (uri->scheme) { + segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); + NULLCHK(segment) + ret = xmlStrcat(ret, segment); + ret = xmlStrcat(ret, BAD_CAST ":"); + xmlFree(segment); + } + + if (uri->authority) { + segment = + xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); + NULLCHK(segment) + ret = xmlStrcat(ret, BAD_CAST "//"); + ret = xmlStrcat(ret, segment); + xmlFree(segment); + } + + if (uri->user) { + segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); + NULLCHK(segment) + ret = xmlStrcat(ret, segment); + ret = xmlStrcat(ret, BAD_CAST "@"); + xmlFree(segment); + } + + if (uri->server) { + segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); + NULLCHK(segment) + ret = xmlStrcat(ret, BAD_CAST "//"); + ret = xmlStrcat(ret, segment); + xmlFree(segment); + } + + if (uri->port) { + xmlChar port[10]; + + snprintf((char *) port, 10, "%d", uri->port); + ret = xmlStrcat(ret, BAD_CAST ":"); + ret = xmlStrcat(ret, port); + } + + if (uri->path) { + segment = + xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); + NULLCHK(segment) + ret = xmlStrcat(ret, segment); + xmlFree(segment); + } + + if (uri->query) { + segment = + xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); + NULLCHK(segment) + ret = xmlStrcat(ret, BAD_CAST "?"); + ret = xmlStrcat(ret, segment); + xmlFree(segment); + } + + if (uri->opaque) { + segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); + NULLCHK(segment) + ret = xmlStrcat(ret, segment); + xmlFree(segment); + } + + if (uri->fragment) { + segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); + NULLCHK(segment) + ret = xmlStrcat(ret, BAD_CAST "#"); + ret = xmlStrcat(ret, segment); + xmlFree(segment); + } + + xmlFreeURI(uri); +#undef NULLCHK + + return (ret); +} + +/************************************************************************ + * * + * Escaped URI parsing * + * * + ************************************************************************/ + +/** + * xmlParseURIFragment: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI fragment string and fills in the appropriate fields + * of the @uri structure. + * + * fragment = *uric + * + * Returns 0 or the error code + */ +static int +xmlParseURIFragment(xmlURIPtr uri, const char **str) +{ + const char *cur = *str; + + if (str == NULL) + return (-1); + + while (IS_URIC(cur) || IS_UNWISE(cur)) + NEXT(cur); + if (uri != NULL) { + if (uri->fragment != NULL) + xmlFree(uri->fragment); + uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return (0); +} + +/** + * xmlParseURIQuery: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * query = *uric + * + * Returns 0 or the error code + */ +static int +xmlParseURIQuery(xmlURIPtr uri, const char **str) +{ + const char *cur = *str; + + if (str == NULL) + return (-1); + + while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->query != NULL) + xmlFree(uri->query); + uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return (0); +} + +/** + * xmlParseURIScheme: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI scheme + * + * scheme = alpha *( alpha | digit | "+" | "-" | "." ) + * + * Returns 0 or the error code + */ +static int +xmlParseURIScheme(xmlURIPtr uri, const char **str) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if (!IS_ALPHA(*cur)) + return(2); + cur++; + while (IS_SCHEME(*cur)) cur++; + if (uri != NULL) { + if (uri->scheme != NULL) xmlFree(uri->scheme); + /* !!! strndup */ + uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return(0); +} + +/** + * xmlParseURIOpaquePart: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI opaque part + * + * opaque_part = uric_no_slash *uric + * + * Returns 0 or the error code + */ +static int +xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + if (!(IS_URIC_NO_SLASH(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) { + return (3); + } + NEXT(cur); + while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->opaque != NULL) + xmlFree(uri->opaque); + uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return (0); +} + +/** + * xmlParseURIServer: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse a server subpart of an URI, it's a finer grain analysis + * of the authority part. + * + * server = [ [ userinfo "@" ] hostport ] + * userinfo = *( unreserved | escaped | + * ";" | ":" | "&" | "=" | "+" | "$" | "," ) + * hostport = host [ ":" port ] + * host = hostname | IPv4address + * hostname = *( domainlabel "." ) toplabel [ "." ] + * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum + * toplabel = alpha | alpha *( alphanum | "-" ) alphanum + * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit + * port = *digit + * + * Returns 0 or the error code + */ +static int +xmlParseURIServer(xmlURIPtr uri, const char **str) { + const char *cur; + const char *host, *tmp; + + if (str == NULL) + return(-1); + + cur = *str; + + /* + * is there an userinfo ? + */ + while (IS_USERINFO(cur)) NEXT(cur); + if (*cur == '@') { + if (uri != NULL) { + if (uri->user != NULL) xmlFree(uri->user); + uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); + } + cur++; + } else { + if (uri != NULL) { + if (uri->user != NULL) xmlFree(uri->user); + uri->user = NULL; + } + cur = *str; + } + /* + * This can be empty in the case where there is no server + */ + host = cur; + if (*cur == '/') { + if (uri != NULL) { + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) xmlFree(uri->server); + uri->server = NULL; + uri->port = 0; + } + return(0); + } + /* + * host part of hostport can derive either an IPV4 address + * or an unresolved name. Check the IP first, it easier to detect + * errors if wrong one + */ + if (IS_DIGIT(*cur)) { + while(IS_DIGIT(*cur)) cur++; + if (*cur != '.') + goto host_name; + cur++; + if (!IS_DIGIT(*cur)) + goto host_name; + while(IS_DIGIT(*cur)) cur++; + if (*cur != '.') + goto host_name; + cur++; + if (!IS_DIGIT(*cur)) + goto host_name; + while(IS_DIGIT(*cur)) cur++; + if (*cur != '.') + goto host_name; + cur++; + if (!IS_DIGIT(*cur)) + goto host_name; + while(IS_DIGIT(*cur)) cur++; + if (uri != NULL) { + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) xmlFree(uri->server); + uri->server = xmlURIUnescapeString(host, cur - host, NULL); + } + goto host_done; + } +host_name: + /* + * the hostname production as-is is a parser nightmare. + * simplify it to + * hostname = *( domainlabel "." ) domainlabel [ "." ] + * and just make sure the last label starts with a non numeric char. + */ + if (!IS_ALPHANUM(*cur)) + return(6); + while (IS_ALPHANUM(*cur)) { + while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++; + if (*cur == '.') + cur++; + } + tmp = cur; + tmp--; + while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--; + tmp++; + if (!IS_ALPHA(*tmp)) + return(7); + if (uri != NULL) { + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) xmlFree(uri->server); + uri->server = xmlURIUnescapeString(host, cur - host, NULL); + } + +host_done: + + /* + * finish by checking for a port presence. + */ + if (*cur == ':') { + cur++; + if (IS_DIGIT(*cur)) { + if (uri != NULL) + uri->port = 0; + while (IS_DIGIT(*cur)) { + if (uri != NULL) + uri->port = uri->port * 10 + (*cur - '0'); + cur++; + } + } + } + *str = cur; + return(0); +} + +/** + * xmlParseURIRelSegment: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI relative segment + * + * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" | + * "+" | "$" | "," ) + * + * Returns 0 or the error code + */ +static int +xmlParseURIRelSegment(xmlURIPtr uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + if (!(IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) { + return (3); + } + NEXT(cur); + while (IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->path != NULL) + xmlFree(uri->path); + uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return (0); +} + +/** + * xmlParseURIPathSegments: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * @slash: should we add a leading slash + * + * Parse an URI set of path segments + * + * path_segments = segment *( "/" segment ) + * segment = *pchar *( ";" param ) + * param = *pchar + * + * Returns 0 or the error code + */ +static int +xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + do { + while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + NEXT(cur); + while (*cur == ';') { + cur++; + while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + NEXT(cur); + } + if (*cur != '/') + break; + cur++; + } while (1); + if (uri != NULL) { + int len, len2 = 0; + char *path; + + /* + * Concat the set of path segments to the current path + */ + len = cur - *str; + if (slash) + len++; + + if (uri->path != NULL) { + len2 = strlen(uri->path); + len += len2; + } + path = (char *) xmlMalloc(len + 1); + if (path == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlParseURIPathSegments: out of memory\n"); + *str = cur; + return (-1); + } + if (uri->path != NULL) + memcpy(path, uri->path, len2); + if (slash) { + path[len2] = '/'; + len2++; + } + path[len2] = 0; + if (cur - *str > 0) + xmlURIUnescapeString(*str, cur - *str, &path[len2]); + if (uri->path != NULL) + xmlFree(uri->path); + uri->path = path; + } + *str = cur; + return (0); +} + +/** + * xmlParseURIAuthority: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the authority part of an URI. + * + * authority = server | reg_name + * server = [ [ userinfo "@" ] hostport ] + * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" | + * "@" | "&" | "=" | "+" ) + * + * Note : this is completely ambiguous since reg_name is allowed to + * use the full set of chars in use by server: + * + * 3.2.1. Registry-based Naming Authority + * + * The structure of a registry-based naming authority is specific + * to the URI scheme, but constrained to the allowed characters + * for an authority component. + * + * Returns 0 or the error code + */ +static int +xmlParseURIAuthority(xmlURIPtr uri, const char **str) { + const char *cur; + int ret; + + if (str == NULL) + return(-1); + + cur = *str; + + /* + * try first to parse it as a server string. + */ + ret = xmlParseURIServer(uri, str); + if (ret == 0) + return(0); + + /* + * failed, fallback to reg_name + */ + if (!IS_REG_NAME(cur)) { + return(5); + } + NEXT(cur); + while (IS_REG_NAME(cur)) NEXT(cur); + if (uri != NULL) { + if (uri->server != NULL) xmlFree(uri->server); + uri->server = NULL; + if (uri->user != NULL) xmlFree(uri->user); + uri->user = NULL; + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return(0); +} + +/** + * xmlParseURIHierPart: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI hierarchical part + * + * hier_part = ( net_path | abs_path ) [ "?" query ] + * abs_path = "/" path_segments + * net_path = "//" authority [ abs_path ] + * + * Returns 0 or the error code + */ +static int +xmlParseURIHierPart(xmlURIPtr uri, const char **str) { + int ret; + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + + if ((cur[0] == '/') && (cur[1] == '/')) { + cur += 2; + ret = xmlParseURIAuthority(uri, &cur); + if (ret != 0) + return(ret); + if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } + } else if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } else { + return(4); + } + if (ret != 0) + return(ret); + if (*cur == '?') { + cur++; + ret = xmlParseURIQuery(uri, &cur); + if (ret != 0) + return(ret); + } + *str = cur; + return(0); +} + +/** + * xmlParseAbsoluteURI: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * absoluteURI = scheme ":" ( hier_part | opaque_part ) + * + * Returns 0 or the error code + */ +static int +xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) { + int ret; + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + + ret = xmlParseURIScheme(uri, str); + if (ret != 0) return(ret); + if (**str != ':') { + *str = cur; + return(1); + } + (*str)++; + if (**str == '/') + return(xmlParseURIHierPart(uri, str)); + return(xmlParseURIOpaquePart(uri, str)); +} + +/** + * xmlParseRelativeURI: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an relative URI string and fills in the appropriate fields + * of the @uri structure + * + * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + * abs_path = "/" path_segments + * net_path = "//" authority [ abs_path ] + * rel_path = rel_segment [ abs_path ] + * + * Returns 0 or the error code + */ +static int +xmlParseRelativeURI(xmlURIPtr uri, const char **str) { + int ret = 0; + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if ((cur[0] == '/') && (cur[1] == '/')) { + cur += 2; + ret = xmlParseURIAuthority(uri, &cur); + if (ret != 0) + return(ret); + if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } + } else if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } else if (cur[0] != '#' && cur[0] != '?') { + ret = xmlParseURIRelSegment(uri, &cur); + if (ret != 0) + return(ret); + if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } + } + if (ret != 0) + return(ret); + if (*cur == '?') { + cur++; + ret = xmlParseURIQuery(uri, &cur); + if (ret != 0) + return(ret); + } + *str = cur; + return(ret); +} + +/** + * xmlParseURIReference: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns 0 or the error code + */ +int +xmlParseURIReference(xmlURIPtr uri, const char *str) { + int ret; + const char *tmp = str; + + if (str == NULL) + return(-1); + xmlCleanURI(uri); + + /* + * Try first to parse absolute refs, then fallback to relative if + * it fails. + */ + ret = xmlParseAbsoluteURI(uri, &str); + if (ret != 0) { + xmlCleanURI(uri); + str = tmp; + ret = xmlParseRelativeURI(uri, &str); + } + if (ret != 0) { + xmlCleanURI(uri); + return(ret); + } + + if (*str == '#') { + str++; + ret = xmlParseURIFragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + xmlCleanURI(uri); + return(1); + } + return(0); +} + +/** + * xmlParseURI: + * @str: the URI string to analyze + * + * Parse an URI + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns a newly build xmlURIPtr or NULL in case of error + */ +xmlURIPtr +xmlParseURI(const char *str) { + xmlURIPtr uri; + int ret; + + if (str == NULL) + return(NULL); + uri = xmlCreateURI(); + if (uri != NULL) { + ret = xmlParseURIReference(uri, str); + if (ret) { + xmlFreeURI(uri); + return(NULL); + } + } + return(uri); +} + +/************************************************************************ + * * + * Public functions * + * * + ************************************************************************/ + +/** + * xmlBuildURI: + * @URI: the URI instance found in the document + * @base: the base value + * + * Computes he final URI of the reference done by checking that + * the given URI is valid, and building the final URI using the + * base URI. This is processed according to section 5.2 of the + * RFC 2396 + * + * 5.2. Resolving Relative References to Absolute Form + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * of error. + */ +xmlChar * +xmlBuildURI(const xmlChar *URI, const xmlChar *base) { + xmlChar *val = NULL; + int ret, len, indx, cur, out; + xmlURIPtr ref = NULL; + xmlURIPtr bas = NULL; + xmlURIPtr res = NULL; + + /* + * 1) The URI reference is parsed into the potential four components and + * fragment identifier, as described in Section 4.3. + * + * NOTE that a completely empty URI is treated by modern browsers + * as a reference to "." rather than as a synonym for the current + * URI. Should we do that here? + */ + if (URI == NULL) + ret = -1; + else { + if (*URI) { + ref = xmlCreateURI(); + if (ref == NULL) + goto done; + ret = xmlParseURIReference(ref, (const char *) URI); + } + else + ret = 0; + } + if (ret != 0) + goto done; + if (base == NULL) + ret = -1; + else { + bas = xmlCreateURI(); + if (bas == NULL) + goto done; + ret = xmlParseURIReference(bas, (const char *) base); + } + if (ret != 0) { + if (ref) + val = xmlSaveUri(ref); + goto done; + } + if (ref == NULL) { + /* + * the base fragment must be ignored + */ + if (bas->fragment != NULL) { + xmlFree(bas->fragment); + bas->fragment = NULL; + } + val = xmlSaveUri(bas); + goto done; + } + + /* + * 2) If the path component is empty and the scheme, authority, and + * query components are undefined, then it is a reference to the + * current document and we are done. Otherwise, the reference URI's + * query and fragment components are defined as found (or not found) + * within the URI reference and not inherited from the base URI. + * + * NOTE that in modern browsers, the parsing differs from the above + * in the following aspect: the query component is allowed to be + * defined while still treating this as a reference to the current + * document. + */ + res = xmlCreateURI(); + if (res == NULL) + goto done; + if ((ref->scheme == NULL) && (ref->path == NULL) && + ((ref->authority == NULL) && (ref->server == NULL))) { + if (bas->scheme != NULL) + res->scheme = xmlMemStrdup(bas->scheme); + if (bas->authority != NULL) + res->authority = xmlMemStrdup(bas->authority); + else if (bas->server != NULL) { + res->server = xmlMemStrdup(bas->server); + if (bas->user != NULL) + res->user = xmlMemStrdup(bas->user); + res->port = bas->port; + } + if (bas->path != NULL) + res->path = xmlMemStrdup(bas->path); + if (ref->query != NULL) + res->query = xmlMemStrdup(ref->query); + else if (bas->query != NULL) + res->query = xmlMemStrdup(bas->query); + if (ref->fragment != NULL) + res->fragment = xmlMemStrdup(ref->fragment); + goto step_7; + } + + if (ref->query != NULL) + res->query = xmlMemStrdup(ref->query); + if (ref->fragment != NULL) + res->fragment = xmlMemStrdup(ref->fragment); + + /* + * 3) If the scheme component is defined, indicating that the reference + * starts with a scheme name, then the reference is interpreted as an + * absolute URI and we are done. Otherwise, the reference URI's + * scheme is inherited from the base URI's scheme component. + */ + if (ref->scheme != NULL) { + val = xmlSaveUri(ref); + goto done; + } + if (bas->scheme != NULL) + res->scheme = xmlMemStrdup(bas->scheme); + + /* + * 4) If the authority component is defined, then the reference is a + * network-path and we skip to step 7. Otherwise, the reference + * URI's authority is inherited from the base URI's authority + * component, which will also be undefined if the URI scheme does not + * use an authority component. + */ + if ((ref->authority != NULL) || (ref->server != NULL)) { + if (ref->authority != NULL) + res->authority = xmlMemStrdup(ref->authority); + else { + res->server = xmlMemStrdup(ref->server); + if (ref->user != NULL) + res->user = xmlMemStrdup(ref->user); + res->port = ref->port; + } + if (ref->path != NULL) + res->path = xmlMemStrdup(ref->path); + goto step_7; + } + if (bas->authority != NULL) + res->authority = xmlMemStrdup(bas->authority); + else if (bas->server != NULL) { + res->server = xmlMemStrdup(bas->server); + if (bas->user != NULL) + res->user = xmlMemStrdup(bas->user); + res->port = bas->port; + } + + /* + * 5) If the path component begins with a slash character ("/"), then + * the reference is an absolute-path and we skip to step 7. + */ + if ((ref->path != NULL) && (ref->path[0] == '/')) { + res->path = xmlMemStrdup(ref->path); + goto step_7; + } + + + /* + * 6) If this step is reached, then we are resolving a relative-path + * reference. The relative path needs to be merged with the base + * URI's path. Although there are many ways to do this, we will + * describe a simple method using a separate string buffer. + * + * Allocate a buffer large enough for the result string. + */ + len = 2; /* extra / and 0 */ + if (ref->path != NULL) + len += strlen(ref->path); + if (bas->path != NULL) + len += strlen(bas->path); + res->path = (char *) xmlMalloc(len); + if (res->path == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlBuildURI: out of memory\n"); + goto done; + } + res->path[0] = 0; + + /* + * a) All but the last segment of the base URI's path component is + * copied to the buffer. In other words, any characters after the + * last (right-most) slash character, if any, are excluded. + */ + cur = 0; + out = 0; + if (bas->path != NULL) { + while (bas->path[cur] != 0) { + while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) + cur++; + if (bas->path[cur] == 0) + break; + + cur++; + while (out < cur) { + res->path[out] = bas->path[out]; + out++; + } + } + } + res->path[out] = 0; + + /* + * b) The reference's path component is appended to the buffer + * string. + */ + if (ref->path != NULL && ref->path[0] != 0) { + indx = 0; + /* + * Ensure the path includes a '/' + */ + if ((out == 0) && (bas->server != NULL)) + res->path[out++] = '/'; + while (ref->path[indx] != 0) { + res->path[out++] = ref->path[indx++]; + } + } + res->path[out] = 0; + + /* + * Steps c) to h) are really path normalization steps + */ + xmlNormalizeURIPath(res->path); + +step_7: + + /* + * 7) The resulting URI components, including any inherited from the + * base URI, are recombined to give the absolute form of the URI + * reference. + */ + val = xmlSaveUri(res); + +done: + if (ref != NULL) + xmlFreeURI(ref); + if (bas != NULL) + xmlFreeURI(bas); + if (res != NULL) + xmlFreeURI(res); + return(val); +} + + diff --git a/bundle/libxml/valid.c b/bundle/libxml/valid.c new file mode 100644 index 0000000000..421a29aae5 --- /dev/null +++ b/bundle/libxml/valid.c @@ -0,0 +1,5968 @@ +/* + * valid.c : part of the code use to do the DTD handling and the validity + * checking + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/hash.h> +#include <libxml/valid.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/xmlerror.h> +#include <libxml/list.h> +#include <libxml/globals.h> + +/* #define DEBUG_VALID_ALGO */ +/* #define DEBUG_REGEXP_ALGO */ + +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + + +#ifndef LIBXML_REGEXP_ENABLED +/* + * If regexp are not enabled, it uses a home made algorithm less + * complex and easier to + * debug/maintain than a generic NFA -> DFA state based algo. The + * only restriction is on the deepness of the tree limited by the + * size of the occurs bitfield + * + * this is the content of a saved state for rollbacks + */ + +#define ROLLBACK_OR 0 +#define ROLLBACK_PARENT 1 + +typedef struct _xmlValidState { + xmlElementContentPtr cont; /* pointer to the content model subtree */ + xmlNodePtr node; /* pointer to the current node in the list */ + long occurs;/* bitfield for multiple occurrences */ + unsigned char depth; /* current depth in the overall tree */ + unsigned char state; /* ROLLBACK_XXX */ +} _xmlValidState; + +#define MAX_RECURSE 25000 +#define MAX_DEPTH ((sizeof(_xmlValidState.occurs)) * 8) +#define CONT ctxt->vstate->cont +#define NODE ctxt->vstate->node +#define DEPTH ctxt->vstate->depth +#define OCCURS ctxt->vstate->occurs +#define STATE ctxt->vstate->state + +#define OCCURRENCE (ctxt->vstate->occurs & (1 << DEPTH)) +#define PARENT_OCCURRENCE (ctxt->vstate->occurs & ((1 << DEPTH) - 1)) + +#define SET_OCCURRENCE ctxt->vstate->occurs |= (1 << DEPTH) +#define RESET_OCCURRENCE ctxt->vstate->occurs &= ((1 << DEPTH) - 1) + +static int +vstateVPush(xmlValidCtxtPtr ctxt, xmlElementContentPtr cont, + xmlNodePtr node, unsigned char depth, long occurs, + unsigned char state) { + int i = ctxt->vstateNr - 1; + + if (ctxt->vstateNr > MAX_RECURSE) { + return(-1); + } + if (ctxt->vstateNr >= ctxt->vstateMax) { + ctxt->vstateMax *= 2; + ctxt->vstateTab = (xmlValidState *) xmlRealloc(ctxt->vstateTab, + ctxt->vstateMax * sizeof(ctxt->vstateTab[0])); + if (ctxt->vstateTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !n"); + return(-1); + } + ctxt->vstate = &ctxt->vstateTab[0]; + } + /* + * Don't push on the stack a state already here + */ + if ((i >= 0) && (ctxt->vstateTab[i].cont == cont) && + (ctxt->vstateTab[i].node == node) && + (ctxt->vstateTab[i].depth == depth) && + (ctxt->vstateTab[i].occurs == occurs) && + (ctxt->vstateTab[i].state == state)) + return(ctxt->vstateNr); + ctxt->vstateTab[ctxt->vstateNr].cont = cont; + ctxt->vstateTab[ctxt->vstateNr].node = node; + ctxt->vstateTab[ctxt->vstateNr].depth = depth; + ctxt->vstateTab[ctxt->vstateNr].occurs = occurs; + ctxt->vstateTab[ctxt->vstateNr].state = state; + return(ctxt->vstateNr++); +} + +static int +vstateVPop(xmlValidCtxtPtr ctxt) { + if (ctxt->vstateNr <= 1) return(-1); + ctxt->vstateNr--; + ctxt->vstate = &ctxt->vstateTab[0]; + ctxt->vstate->cont = ctxt->vstateTab[ctxt->vstateNr].cont; + ctxt->vstate->node = ctxt->vstateTab[ctxt->vstateNr].node; + ctxt->vstate->depth = ctxt->vstateTab[ctxt->vstateNr].depth; + ctxt->vstate->occurs = ctxt->vstateTab[ctxt->vstateNr].occurs; + ctxt->vstate->state = ctxt->vstateTab[ctxt->vstateNr].state; + return(ctxt->vstateNr); +} + +#endif /* LIBXML_REGEXP_ENABLED */ + +static int +nodeVPush(xmlValidCtxtPtr ctxt, xmlNodePtr value) +{ + if (ctxt->nodeMax <= 0) { + ctxt->nodeMax = 4; + ctxt->nodeTab = + (xmlNodePtr *) xmlMalloc(ctxt->nodeMax * + sizeof(ctxt->nodeTab[0])); + if (ctxt->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc failed !\n"); + ctxt->nodeMax = 0; + return (0); + } + } + if (ctxt->nodeNr >= ctxt->nodeMax) { + ctxt->nodeMax *= 2; + ctxt->nodeTab = + (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, + ctxt->nodeMax * + sizeof(ctxt->nodeTab[0])); + if (ctxt->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); + return (0); + } + } + ctxt->nodeTab[ctxt->nodeNr] = value; + ctxt->node = value; + return (ctxt->nodeNr++); +} +static xmlNodePtr +nodeVPop(xmlValidCtxtPtr ctxt) +{ + xmlNodePtr ret; + + if (ctxt->nodeNr <= 0) + return (0); + ctxt->nodeNr--; + if (ctxt->nodeNr > 0) + ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; + else + ctxt->node = NULL; + ret = ctxt->nodeTab[ctxt->nodeNr]; + ctxt->nodeTab[ctxt->nodeNr] = 0; + return (ret); +} + +#ifdef DEBUG_VALID_ALGO +static void +xmlValidPrintNode(xmlNodePtr cur) { + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, "null"); + return; + } + switch (cur->type) { + case XML_ELEMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "%s ", cur->name); + break; + case XML_TEXT_NODE: + xmlGenericError(xmlGenericErrorContext, "text "); + break; + case XML_CDATA_SECTION_NODE: + xmlGenericError(xmlGenericErrorContext, "cdata "); + break; + case XML_ENTITY_REF_NODE: + xmlGenericError(xmlGenericErrorContext, "&%s; ", cur->name); + break; + case XML_PI_NODE: + xmlGenericError(xmlGenericErrorContext, "pi(%s) ", cur->name); + break; + case XML_COMMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "comment "); + break; + case XML_ATTRIBUTE_NODE: + xmlGenericError(xmlGenericErrorContext, "?attr? "); + break; + case XML_ENTITY_NODE: + xmlGenericError(xmlGenericErrorContext, "?ent? "); + break; + case XML_DOCUMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "?doc? "); + break; + case XML_DOCUMENT_TYPE_NODE: + xmlGenericError(xmlGenericErrorContext, "?doctype? "); + break; + case XML_DOCUMENT_FRAG_NODE: + xmlGenericError(xmlGenericErrorContext, "?frag? "); + break; + case XML_NOTATION_NODE: + xmlGenericError(xmlGenericErrorContext, "?nota? "); + break; + case XML_HTML_DOCUMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "?html? "); + break; +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "?docb? "); + break; +#endif + case XML_DTD_NODE: + xmlGenericError(xmlGenericErrorContext, "?dtd? "); + break; + case XML_ELEMENT_DECL: + xmlGenericError(xmlGenericErrorContext, "?edecl? "); + break; + case XML_ATTRIBUTE_DECL: + xmlGenericError(xmlGenericErrorContext, "?adecl? "); + break; + case XML_ENTITY_DECL: + xmlGenericError(xmlGenericErrorContext, "?entdecl? "); + break; + case XML_NAMESPACE_DECL: + xmlGenericError(xmlGenericErrorContext, "?nsdecl? "); + break; + case XML_XINCLUDE_START: + xmlGenericError(xmlGenericErrorContext, "incstart "); + break; + case XML_XINCLUDE_END: + xmlGenericError(xmlGenericErrorContext, "incend "); + break; + } +} + +static void +xmlValidPrintNodeList(xmlNodePtr cur) { + if (cur == NULL) + xmlGenericError(xmlGenericErrorContext, "null "); + while (cur != NULL) { + xmlValidPrintNode(cur); + cur = cur->next; + } +} + +static void +xmlValidDebug(xmlNodePtr cur, xmlElementContentPtr cont) { + char expr[1000]; + + expr[0] = 0; + xmlGenericError(xmlGenericErrorContext, "valid: "); + xmlValidPrintNodeList(cur); + xmlGenericError(xmlGenericErrorContext, "against "); + xmlSnprintfElementContent(expr, 5000, cont, 1); + xmlGenericError(xmlGenericErrorContext, "%s\n", expr); +} + +static void +xmlValidDebugState(xmlValidStatePtr state) { + xmlGenericError(xmlGenericErrorContext, "("); + if (state->cont == NULL) + xmlGenericError(xmlGenericErrorContext, "null,"); + else + switch (state->cont->type) { + case XML_ELEMENT_CONTENT_PCDATA: + xmlGenericError(xmlGenericErrorContext, "pcdata,"); + break; + case XML_ELEMENT_CONTENT_ELEMENT: + xmlGenericError(xmlGenericErrorContext, "%s,", + state->cont->name); + break; + case XML_ELEMENT_CONTENT_SEQ: + xmlGenericError(xmlGenericErrorContext, "seq,"); + break; + case XML_ELEMENT_CONTENT_OR: + xmlGenericError(xmlGenericErrorContext, "or,"); + break; + } + xmlValidPrintNode(state->node); + xmlGenericError(xmlGenericErrorContext, ",%d,%X,%d)", + state->depth, state->occurs, state->state); +} + +static void +xmlValidStateDebug(xmlValidCtxtPtr ctxt) { + int i, j; + + xmlGenericError(xmlGenericErrorContext, "state: "); + xmlValidDebugState(ctxt->vstate); + xmlGenericError(xmlGenericErrorContext, " stack: %d ", + ctxt->vstateNr - 1); + for (i = 0, j = ctxt->vstateNr - 1;(i < 3) && (j > 0);i++,j--) + xmlValidDebugState(&ctxt->vstateTab[j]); + xmlGenericError(xmlGenericErrorContext, "\n"); +} + +/***** +#define DEBUG_VALID_STATE(n,c) xmlValidDebug(n,c); + *****/ + +#define DEBUG_VALID_STATE(n,c) xmlValidStateDebug(ctxt); +#define DEBUG_VALID_MSG(m) \ + xmlGenericError(xmlGenericErrorContext, "%s\n", m); + +#else +#define DEBUG_VALID_STATE(n,c) +#define DEBUG_VALID_MSG(m) +#endif + +/* TODO: use hash table for accesses to elem and attribute definitions */ + +#define VECTXT(ctxt, node) \ + if ((ctxt != NULL) && (ctxt->error != NULL) && \ + (node != NULL)) { \ + xmlChar *base = xmlNodeGetBase(NULL,node); \ + if (base != NULL) { \ + ctxt->error(ctxt->userData, "%s:%d: ", base, \ + (int) (long) node->content); \ + xmlFree(base); \ + } else \ + ctxt->error(ctxt->userData, ":%d: ", \ + (int) (long) node->content); \ + } + +#define VWCTXT(ctxt, node) \ + if ((ctxt != NULL) && (ctxt->warning != NULL) && \ + (node != NULL)) { \ + xmlChar *base = xmlNodeGetBase(NULL,node); \ + if (base != NULL) { \ + ctxt->warning(ctxt->userData, "%s:%d: ", base, \ + (int) (long) node->content); \ + xmlFree(base); \ + } else \ + ctxt->warning(ctxt->userData, ":%d: ", \ + (int) (long) node->content); \ + } + +#define VERROR \ + if ((ctxt != NULL) && (ctxt->error != NULL)) ctxt->error + +#define VWARNING \ + if ((ctxt != NULL) && (ctxt->warning != NULL)) ctxt->warning + +#define CHECK_DTD \ + if (doc == NULL) return(0); \ + else if ((doc->intSubset == NULL) && \ + (doc->extSubset == NULL)) return(0) + +static xmlElementPtr xmlGetDtdElementDesc2(xmlDtdPtr dtd, const xmlChar *name, + int create); +xmlAttributePtr xmlScanAttributeDecl(xmlDtdPtr dtd, const xmlChar *elem); + +#ifdef LIBXML_REGEXP_ENABLED + +/************************************************************************ + * * + * Content model validation based on the regexps * + * * + ************************************************************************/ + +/** + * xmlValidBuildAContentModel: + * @content: the content model + * @ctxt: the schema parser context + * @name: the element name whose content is being built + * + * Generate the automata sequence needed for that type + * + * Returns 1 if successful or 0 in case of error. + */ +static int +xmlValidBuildAContentModel(xmlElementContentPtr content, + xmlValidCtxtPtr ctxt, + const xmlChar *name) { + if (content == NULL) { + VERROR(ctxt->userData, + "Found unexpected type = NULL in %s content model\n", name); + return(0); + } + switch (content->type) { + case XML_ELEMENT_CONTENT_PCDATA: + VERROR(ctxt->userData, "ContentModel found PCDATA for element %s\n", + name); + return(0); + break; + case XML_ELEMENT_CONTENT_ELEMENT: { + xmlAutomataStatePtr oldstate = ctxt->state; + xmlChar *QName = NULL; + const xmlChar *fname = content->name; + + if (content->prefix != NULL) { + int len; + + len = xmlStrlen(content->name) + + xmlStrlen(content->prefix) + 2; + QName = xmlMalloc(len); + if (QName == NULL) { + VERROR(ctxt->userData, + "ContentModel %s : alloc failed\n", name); + return(0); + } + snprintf((char *) QName, len, "%s:%s", + (char *)content->prefix, + (char *)content->name); + fname = QName; + } + + switch (content->ocur) { + case XML_ELEMENT_CONTENT_ONCE: + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, fname, NULL); + break; + case XML_ELEMENT_CONTENT_OPT: + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, fname, NULL); + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + break; + case XML_ELEMENT_CONTENT_PLUS: + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, fname, NULL); + xmlAutomataNewTransition(ctxt->am, ctxt->state, + ctxt->state, fname, NULL); + break; + case XML_ELEMENT_CONTENT_MULT: + xmlAutomataNewTransition(ctxt->am, ctxt->state, + ctxt->state, fname, NULL); + break; + } + if (QName != NULL) + xmlFree(QName); + break; + } + case XML_ELEMENT_CONTENT_SEQ: { + xmlAutomataStatePtr oldstate, oldend; + xmlElementContentOccur ocur; + + /* + * Simply iterate over the content + */ + oldstate = ctxt->state; + ocur = content->ocur; + do { + xmlValidBuildAContentModel(content->c1, ctxt, name); + content = content->c2; + } while ((content->type == XML_ELEMENT_CONTENT_SEQ) && + (content->ocur == XML_ELEMENT_CONTENT_ONCE)); + xmlValidBuildAContentModel(content, ctxt, name); + oldend = ctxt->state; + ctxt->state = xmlAutomataNewEpsilon(ctxt->am, oldend, NULL); + switch (ocur) { + case XML_ELEMENT_CONTENT_ONCE: + break; + case XML_ELEMENT_CONTENT_OPT: + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + break; + case XML_ELEMENT_CONTENT_MULT: + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + xmlAutomataNewEpsilon(ctxt->am, oldend, oldstate); + break; + case XML_ELEMENT_CONTENT_PLUS: + xmlAutomataNewEpsilon(ctxt->am, oldend, oldstate); + break; + } + break; + } + case XML_ELEMENT_CONTENT_OR: { + xmlAutomataStatePtr oldstate, oldend; + xmlElementContentOccur ocur; + + ocur = content->ocur; + if ((ocur == XML_ELEMENT_CONTENT_PLUS) || + (ocur == XML_ELEMENT_CONTENT_MULT)) { + ctxt->state = xmlAutomataNewEpsilon(ctxt->am, + ctxt->state, NULL); + } + oldstate = ctxt->state; + oldend = xmlAutomataNewState(ctxt->am); + + /* + * iterate over the subtypes and remerge the end with an + * epsilon transition + */ + do { + ctxt->state = oldstate; + xmlValidBuildAContentModel(content->c1, ctxt, name); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldend); + content = content->c2; + } while ((content->type == XML_ELEMENT_CONTENT_OR) && + (content->ocur == XML_ELEMENT_CONTENT_ONCE)); + ctxt->state = oldstate; + xmlValidBuildAContentModel(content, ctxt, name); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldend); + ctxt->state = xmlAutomataNewEpsilon(ctxt->am, oldend, NULL); + switch (ocur) { + case XML_ELEMENT_CONTENT_ONCE: + break; + case XML_ELEMENT_CONTENT_OPT: + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + break; + case XML_ELEMENT_CONTENT_MULT: + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + xmlAutomataNewEpsilon(ctxt->am, oldend, oldstate); + break; + case XML_ELEMENT_CONTENT_PLUS: + xmlAutomataNewEpsilon(ctxt->am, oldend, oldstate); + break; + } + break; + } + default: + VERROR(ctxt->userData, "ContentModel broken for element %s\n", + name); + return(0); + } + return(1); +} +/** + * xmlValidBuildContentModel: + * @ctxt: a validation context + * @elem: an element declaration node + * + * (Re)Build the automata associated to the content model of this + * element + * + * Returns 1 in case of success, 0 in case of error + */ +int +xmlValidBuildContentModel(xmlValidCtxtPtr ctxt, xmlElementPtr elem) { + xmlAutomataStatePtr start; + + if ((ctxt == NULL) || (elem == NULL)) + return(0); + if (elem->type != XML_ELEMENT_DECL) + return(0); + if (elem->etype != XML_ELEMENT_TYPE_ELEMENT) + return(1); + /* TODO: should we rebuild in this case ? */ + if (elem->contModel != NULL) + return(1); + + ctxt->am = xmlNewAutomata(); + if (ctxt->am == NULL) { + VERROR(ctxt->userData, "Cannot create automata for element %s\n", + elem->name); + return(0); + } + start = ctxt->state = xmlAutomataGetInitState(ctxt->am); + xmlValidBuildAContentModel(elem->content, ctxt, elem->name); + xmlAutomataSetFinalState(ctxt->am, ctxt->state); + elem->contModel = xmlAutomataCompile(ctxt->am); + if (!xmlRegexpIsDeterminist(elem->contModel)) { + char expr[5000]; + expr[0] = 0; + xmlSnprintfElementContent(expr, 5000, elem->content, 1); + VERROR(ctxt->userData, "Content model of %s is not determinist: %s\n", + elem->name, expr); +#ifdef DEBUG_REGEXP_ALGO + xmlRegexpPrint(stderr, elem->contModel); +#endif + ctxt->valid = 0; + } + ctxt->state = NULL; + xmlFreeAutomata(ctxt->am); + ctxt->am = NULL; + return(1); +} + +#endif /* LIBXML_REGEXP_ENABLED */ + +/************************************************************************ + * * + * QName handling helper * + * * + ************************************************************************/ + +/** + * xmlSplitQName2: + * @name: an XML parser context + * @prefix: a xmlChar ** + * + * parse an XML qualified name string + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns NULL if not a QName, otherwise the local part, and prefix + * is updated to get the Prefix if any. + */ + +xmlChar * +xmlSplitQName2(const xmlChar *name, xmlChar **prefix) { + int len = 0; + xmlChar *ret = NULL; + + *prefix = NULL; + +#ifndef XML_XML_NAMESPACE + /* xml: prefix is not really a namespace */ + if ((name[0] == 'x') && (name[1] == 'm') && + (name[2] == 'l') && (name[3] == ':')) + return(NULL); +#endif + + /* nasty but valid */ + if (name[0] == ':') + return(NULL); + + /* + * we are not trying to validate but just to cut, and yes it will + * work even if this is as set of UTF-8 encoded chars + */ + while ((name[len] != 0) && (name[len] != ':')) + len++; + + if (name[len] == 0) + return(NULL); + + *prefix = xmlStrndup(name, len); + ret = xmlStrdup(&name[len + 1]); + + return(ret); +} + +/**************************************************************** + * * + * Util functions for data allocation/deallocation * + * * + ****************************************************************/ + +/** + * xmlNewElementContent: + * @name: the subelement name or NULL + * @type: the type of element content decl + * + * Allocate an element content structure. + * + * Returns NULL if not, otherwise the new element content structure + */ +xmlElementContentPtr +xmlNewElementContent(xmlChar *name, xmlElementContentType type) { + xmlElementContentPtr ret; + + switch(type) { + case XML_ELEMENT_CONTENT_ELEMENT: + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewElementContent : name == NULL !\n"); + } + break; + case XML_ELEMENT_CONTENT_PCDATA: + case XML_ELEMENT_CONTENT_SEQ: + case XML_ELEMENT_CONTENT_OR: + if (name != NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewElementContent : name != NULL !\n"); + } + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlNewElementContent: unknown type %d\n", type); + return(NULL); + } + ret = (xmlElementContentPtr) xmlMalloc(sizeof(xmlElementContent)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewElementContent : out of memory!\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlElementContent)); + ret->type = type; + ret->ocur = XML_ELEMENT_CONTENT_ONCE; + if (name != NULL) { + xmlChar *prefix = NULL; + ret->name = xmlSplitQName2(name, &prefix); + if (ret->name == NULL) + ret->name = xmlStrdup(name); + ret->prefix = prefix; + } else { + ret->name = NULL; + ret->prefix = NULL; + } + ret->c1 = ret->c2 = ret->parent = NULL; + return(ret); +} + +/** + * xmlCopyElementContent: + * @cur: An element content pointer. + * + * Build a copy of an element content description. + * + * Returns the new xmlElementContentPtr or NULL in case of error. + */ +xmlElementContentPtr +xmlCopyElementContent(xmlElementContentPtr cur) { + xmlElementContentPtr ret; + + if (cur == NULL) return(NULL); + ret = xmlNewElementContent((xmlChar *) cur->name, cur->type); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyElementContent : out of memory\n"); + return(NULL); + } + if (cur->prefix != NULL) + ret->prefix = xmlStrdup(cur->prefix); + ret->ocur = cur->ocur; + if (cur->c1 != NULL) ret->c1 = xmlCopyElementContent(cur->c1); + if (ret->c1 != NULL) + ret->c1->parent = ret; + if (cur->c2 != NULL) ret->c2 = xmlCopyElementContent(cur->c2); + if (ret->c2 != NULL) + ret->c2->parent = ret; + return(ret); +} + +/** + * xmlFreeElementContent: + * @cur: the element content tree to free + * + * Free an element content structure. This is a recursive call ! + */ +void +xmlFreeElementContent(xmlElementContentPtr cur) { + if (cur == NULL) return; + switch (cur->type) { + case XML_ELEMENT_CONTENT_PCDATA: + case XML_ELEMENT_CONTENT_ELEMENT: + case XML_ELEMENT_CONTENT_SEQ: + case XML_ELEMENT_CONTENT_OR: + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlFreeElementContent : type %d\n", cur->type); + return; + } + if (cur->c1 != NULL) xmlFreeElementContent(cur->c1); + if (cur->c2 != NULL) xmlFreeElementContent(cur->c2); + if (cur->name != NULL) xmlFree((xmlChar *) cur->name); + if (cur->prefix != NULL) xmlFree((xmlChar *) cur->prefix); + xmlFree(cur); +} + +/** + * xmlDumpElementContent: + * @buf: An XML buffer + * @content: An element table + * @glob: 1 if one must print the englobing parenthesis, 0 otherwise + * + * This will dump the content of the element table as an XML DTD definition + */ +static void +xmlDumpElementContent(xmlBufferPtr buf, xmlElementContentPtr content, int glob) { + if (content == NULL) return; + + if (glob) xmlBufferWriteChar(buf, "("); + switch (content->type) { + case XML_ELEMENT_CONTENT_PCDATA: + xmlBufferWriteChar(buf, "#PCDATA"); + break; + case XML_ELEMENT_CONTENT_ELEMENT: + if (content->prefix != NULL) { + xmlBufferWriteCHAR(buf, content->prefix); + xmlBufferWriteChar(buf, ":"); + } + xmlBufferWriteCHAR(buf, content->name); + break; + case XML_ELEMENT_CONTENT_SEQ: + if ((content->c1->type == XML_ELEMENT_CONTENT_OR) || + (content->c1->type == XML_ELEMENT_CONTENT_SEQ)) + xmlDumpElementContent(buf, content->c1, 1); + else + xmlDumpElementContent(buf, content->c1, 0); + xmlBufferWriteChar(buf, " , "); + if (content->c2->type == XML_ELEMENT_CONTENT_OR) + xmlDumpElementContent(buf, content->c2, 1); + else + xmlDumpElementContent(buf, content->c2, 0); + break; + case XML_ELEMENT_CONTENT_OR: + if ((content->c1->type == XML_ELEMENT_CONTENT_OR) || + (content->c1->type == XML_ELEMENT_CONTENT_SEQ)) + xmlDumpElementContent(buf, content->c1, 1); + else + xmlDumpElementContent(buf, content->c1, 0); + xmlBufferWriteChar(buf, " | "); + if (content->c2->type == XML_ELEMENT_CONTENT_SEQ) + xmlDumpElementContent(buf, content->c2, 1); + else + xmlDumpElementContent(buf, content->c2, 0); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpElementContent: unknown type %d\n", + content->type); + } + if (glob) + xmlBufferWriteChar(buf, ")"); + switch (content->ocur) { + case XML_ELEMENT_CONTENT_ONCE: + break; + case XML_ELEMENT_CONTENT_OPT: + xmlBufferWriteChar(buf, "?"); + break; + case XML_ELEMENT_CONTENT_MULT: + xmlBufferWriteChar(buf, "*"); + break; + case XML_ELEMENT_CONTENT_PLUS: + xmlBufferWriteChar(buf, "+"); + break; + } +} + +/** + * xmlSprintfElementContent: + * @buf: an output buffer + * @content: An element table + * @glob: 1 if one must print the englobing parenthesis, 0 otherwise + * + * Deprecated, unsafe, use xmlSnprintfElementContent + */ +void +xmlSprintfElementContent(char *buf ATTRIBUTE_UNUSED, + xmlElementContentPtr content ATTRIBUTE_UNUSED, + int glob ATTRIBUTE_UNUSED) { +} + +/** + * xmlSnprintfElementContent: + * @buf: an output buffer + * @size: the buffer size + * @content: An element table + * @glob: 1 if one must print the englobing parenthesis, 0 otherwise + * + * This will dump the content of the element content definition + * Intended just for the debug routine + */ +void +xmlSnprintfElementContent(char *buf, int size, xmlElementContentPtr content, int glob) { + int len; + + if (content == NULL) return; + len = strlen(buf); + if (size - len < 50) { + if ((size - len > 4) && (buf[len - 1] != '.')) + strcat(buf, " ..."); + return; + } + if (glob) strcat(buf, "("); + switch (content->type) { + case XML_ELEMENT_CONTENT_PCDATA: + strcat(buf, "#PCDATA"); + break; + case XML_ELEMENT_CONTENT_ELEMENT: + if (content->prefix != NULL) { + if (size - len < xmlStrlen(content->prefix) + 10) { + strcat(buf, " ..."); + return; + } + strcat(buf, (char *) content->prefix); + strcat(buf, ":"); + } + if (size - len < xmlStrlen(content->name) + 10) { + strcat(buf, " ..."); + return; + } + strcat(buf, (char *) content->name); + break; + case XML_ELEMENT_CONTENT_SEQ: + if ((content->c1->type == XML_ELEMENT_CONTENT_OR) || + (content->c1->type == XML_ELEMENT_CONTENT_SEQ)) + xmlSnprintfElementContent(buf, size, content->c1, 1); + else + xmlSnprintfElementContent(buf, size, content->c1, 0); + len = strlen(buf); + if (size - len < 50) { + if ((size - len > 4) && (buf[len - 1] != '.')) + strcat(buf, " ..."); + return; + } + strcat(buf, " , "); + if (((content->c2->type == XML_ELEMENT_CONTENT_OR) || + (content->c2->ocur != XML_ELEMENT_CONTENT_ONCE)) && + (content->c2->type != XML_ELEMENT_CONTENT_ELEMENT)) + xmlSnprintfElementContent(buf, size, content->c2, 1); + else + xmlSnprintfElementContent(buf, size, content->c2, 0); + break; + case XML_ELEMENT_CONTENT_OR: + if ((content->c1->type == XML_ELEMENT_CONTENT_OR) || + (content->c1->type == XML_ELEMENT_CONTENT_SEQ)) + xmlSnprintfElementContent(buf, size, content->c1, 1); + else + xmlSnprintfElementContent(buf, size, content->c1, 0); + len = strlen(buf); + if (size - len < 50) { + if ((size - len > 4) && (buf[len - 1] != '.')) + strcat(buf, " ..."); + return; + } + strcat(buf, " | "); + if (((content->c2->type == XML_ELEMENT_CONTENT_SEQ) || + (content->c2->ocur != XML_ELEMENT_CONTENT_ONCE)) && + (content->c2->type != XML_ELEMENT_CONTENT_ELEMENT)) + xmlSnprintfElementContent(buf, size, content->c2, 1); + else + xmlSnprintfElementContent(buf, size, content->c2, 0); + break; + } + if (glob) + strcat(buf, ")"); + switch (content->ocur) { + case XML_ELEMENT_CONTENT_ONCE: + break; + case XML_ELEMENT_CONTENT_OPT: + strcat(buf, "?"); + break; + case XML_ELEMENT_CONTENT_MULT: + strcat(buf, "*"); + break; + case XML_ELEMENT_CONTENT_PLUS: + strcat(buf, "+"); + break; + } +} + +/**************************************************************** + * * + * Registration of DTD declarations * + * * + ****************************************************************/ + +/** + * xmlCreateElementTable: + * + * create and initialize an empty element hash table. + * + * Returns the xmlElementTablePtr just created or NULL in case of error. + */ +static xmlElementTablePtr +xmlCreateElementTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlFreeElement: + * @elem: An element + * + * Deallocate the memory used by an element definition + */ +static void +xmlFreeElement(xmlElementPtr elem) { + if (elem == NULL) return; + xmlUnlinkNode((xmlNodePtr) elem); + xmlFreeElementContent(elem->content); + if (elem->name != NULL) + xmlFree((xmlChar *) elem->name); + if (elem->prefix != NULL) + xmlFree((xmlChar *) elem->prefix); +#ifdef LIBXML_REGEXP_ENABLED + if (elem->contModel != NULL) + xmlRegFreeRegexp(elem->contModel); +#endif + xmlFree(elem); +} + + +/** + * xmlAddElementDecl: + * @ctxt: the validation context + * @dtd: pointer to the DTD + * @name: the entity name + * @type: the element type + * @content: the element content tree or NULL + * + * Register a new element declaration + * + * Returns NULL if not, otherwise the entity + */ +xmlElementPtr +xmlAddElementDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *name, + xmlElementTypeVal type, + xmlElementContentPtr content) { + xmlElementPtr ret; + xmlElementTablePtr table; + xmlAttributePtr oldAttributes = NULL; + xmlChar *ns, *uqname; + + if (dtd == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: dtd == NULL\n"); + return(NULL); + } + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: name == NULL\n"); + return(NULL); + } + switch (type) { + case XML_ELEMENT_TYPE_EMPTY: + if (content != NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: content != NULL for EMPTY\n"); + return(NULL); + } + break; + case XML_ELEMENT_TYPE_ANY: + if (content != NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: content != NULL for ANY\n"); + return(NULL); + } + break; + case XML_ELEMENT_TYPE_MIXED: + if (content == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: content == NULL for MIXED\n"); + return(NULL); + } + break; + case XML_ELEMENT_TYPE_ELEMENT: + if (content == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: content == NULL for ELEMENT\n"); + return(NULL); + } + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: unknown type %d\n", type); + return(NULL); + } + + /* + * check if name is a QName + */ + uqname = xmlSplitQName2(name, &ns); + if (uqname != NULL) + name = uqname; + + /* + * Create the Element table if needed. + */ + table = (xmlElementTablePtr) dtd->elements; + if (table == NULL) { + table = xmlCreateElementTable(); + dtd->elements = (void *) table; + } + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: Table creation failed!\n"); + return(NULL); + } + + /* + * lookup old attributes inserted on an undefined element in the + * internal subset. + */ + if ((dtd->doc != NULL) && (dtd->doc->intSubset != NULL)) { + ret = xmlHashLookup2(dtd->doc->intSubset->elements, name, ns); + if ((ret != NULL) && (ret->etype == XML_ELEMENT_TYPE_UNDEFINED)) { + oldAttributes = ret->attributes; + ret->attributes = NULL; + xmlHashRemoveEntry2(dtd->doc->intSubset->elements, name, ns, NULL); + xmlFreeElement(ret); + } + } + + /* + * The element may already be present if one of its attribute + * was registered first + */ + ret = xmlHashLookup2(table, name, ns); + if (ret != NULL) { + if (ret->etype != XML_ELEMENT_TYPE_UNDEFINED) { + /* + * The element is already defined in this DTD. + */ + VERROR(ctxt->userData, "Redefinition of element %s\n", name); + if (uqname != NULL) + xmlFree(uqname); + return(NULL); + } + } else { + ret = (xmlElementPtr) xmlMalloc(sizeof(xmlElement)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlElement)); + ret->type = XML_ELEMENT_DECL; + + /* + * fill the structure. + */ + ret->name = xmlStrdup(name); + ret->prefix = ns; + + /* + * Validity Check: + * Insertion must not fail + */ + if (xmlHashAddEntry2(table, name, ns, ret)) { + /* + * The element is already defined in this DTD. + */ + VERROR(ctxt->userData, "Redefinition of element %s\n", name); + xmlFreeElement(ret); + if (uqname != NULL) + xmlFree(uqname); + return(NULL); + } + } + + /* + * Finish to fill the structure. + */ + ret->etype = type; + ret->content = xmlCopyElementContent(content); + ret->attributes = oldAttributes; + + /* + * Link it to the DTD + */ + ret->parent = dtd; + ret->doc = dtd->doc; + if (dtd->last == NULL) { + dtd->children = dtd->last = (xmlNodePtr) ret; + } else { + dtd->last->next = (xmlNodePtr) ret; + ret->prev = dtd->last; + dtd->last = (xmlNodePtr) ret; + } + if (uqname != NULL) + xmlFree(uqname); + return(ret); +} + +/** + * xmlFreeElementTable: + * @table: An element table + * + * Deallocate the memory used by an element hash table. + */ +void +xmlFreeElementTable(xmlElementTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeElement); +} + +/** + * xmlCopyElement: + * @elem: An element + * + * Build a copy of an element. + * + * Returns the new xmlElementPtr or NULL in case of error. + */ +static xmlElementPtr +xmlCopyElement(xmlElementPtr elem) { + xmlElementPtr cur; + + cur = (xmlElementPtr) xmlMalloc(sizeof(xmlElement)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyElement: out of memory !\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlElement)); + cur->type = XML_ELEMENT_DECL; + cur->etype = elem->etype; + if (elem->name != NULL) + cur->name = xmlStrdup(elem->name); + else + cur->name = NULL; + if (elem->prefix != NULL) + cur->prefix = xmlStrdup(elem->prefix); + else + cur->prefix = NULL; + cur->content = xmlCopyElementContent(elem->content); + /* TODO : rebuild the attribute list on the copy */ + cur->attributes = NULL; + return(cur); +} + +/** + * xmlCopyElementTable: + * @table: An element table + * + * Build a copy of an element table. + * + * Returns the new xmlElementTablePtr or NULL in case of error. + */ +xmlElementTablePtr +xmlCopyElementTable(xmlElementTablePtr table) { + return((xmlElementTablePtr) xmlHashCopy(table, + (xmlHashCopier) xmlCopyElement)); +} + +/** + * xmlDumpElementDecl: + * @buf: the XML buffer output + * @elem: An element table + * + * This will dump the content of the element declaration as an XML + * DTD definition + */ +void +xmlDumpElementDecl(xmlBufferPtr buf, xmlElementPtr elem) { + switch (elem->etype) { + case XML_ELEMENT_TYPE_EMPTY: + xmlBufferWriteChar(buf, "<!ELEMENT "); + if (elem->prefix != NULL) { + xmlBufferWriteCHAR(buf, elem->prefix); + xmlBufferWriteChar(buf, ":"); + } + xmlBufferWriteCHAR(buf, elem->name); + xmlBufferWriteChar(buf, " EMPTY>\n"); + break; + case XML_ELEMENT_TYPE_ANY: + xmlBufferWriteChar(buf, "<!ELEMENT "); + if (elem->prefix != NULL) { + xmlBufferWriteCHAR(buf, elem->prefix); + xmlBufferWriteChar(buf, ":"); + } + xmlBufferWriteCHAR(buf, elem->name); + xmlBufferWriteChar(buf, " ANY>\n"); + break; + case XML_ELEMENT_TYPE_MIXED: + xmlBufferWriteChar(buf, "<!ELEMENT "); + if (elem->prefix != NULL) { + xmlBufferWriteCHAR(buf, elem->prefix); + xmlBufferWriteChar(buf, ":"); + } + xmlBufferWriteCHAR(buf, elem->name); + xmlBufferWriteChar(buf, " "); + xmlDumpElementContent(buf, elem->content, 1); + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_ELEMENT_TYPE_ELEMENT: + xmlBufferWriteChar(buf, "<!ELEMENT "); + if (elem->prefix != NULL) { + xmlBufferWriteCHAR(buf, elem->prefix); + xmlBufferWriteChar(buf, ":"); + } + xmlBufferWriteCHAR(buf, elem->name); + xmlBufferWriteChar(buf, " "); + xmlDumpElementContent(buf, elem->content, 1); + xmlBufferWriteChar(buf, ">\n"); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpElementDecl: internal: unknown type %d\n", + elem->etype); + } +} + +/** + * xmlDumpElementTable: + * @buf: the XML buffer output + * @table: An element table + * + * This will dump the content of the element table as an XML DTD definition + */ +void +xmlDumpElementTable(xmlBufferPtr buf, xmlElementTablePtr table) { + xmlHashScan(table, (xmlHashScanner) xmlDumpElementDecl, buf); +} + +/** + * xmlCreateEnumeration: + * @name: the enumeration name or NULL + * + * create and initialize an enumeration attribute node. + * + * Returns the xmlEnumerationPtr just created or NULL in case + * of error. + */ +xmlEnumerationPtr +xmlCreateEnumeration(xmlChar *name) { + xmlEnumerationPtr ret; + + ret = (xmlEnumerationPtr) xmlMalloc(sizeof(xmlEnumeration)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCreateEnumeration : xmlMalloc(%ld) failed\n", + (long)sizeof(xmlEnumeration)); + return(NULL); + } + memset(ret, 0, sizeof(xmlEnumeration)); + + if (name != NULL) + ret->name = xmlStrdup(name); + return(ret); +} + +/** + * xmlFreeEnumeration: + * @cur: the tree to free. + * + * free an enumeration attribute node (recursive). + */ +void +xmlFreeEnumeration(xmlEnumerationPtr cur) { + if (cur == NULL) return; + + if (cur->next != NULL) xmlFreeEnumeration(cur->next); + + if (cur->name != NULL) xmlFree((xmlChar *) cur->name); + xmlFree(cur); +} + +/** + * xmlCopyEnumeration: + * @cur: the tree to copy. + * + * Copy an enumeration attribute node (recursive). + * + * Returns the xmlEnumerationPtr just created or NULL in case + * of error. + */ +xmlEnumerationPtr +xmlCopyEnumeration(xmlEnumerationPtr cur) { + xmlEnumerationPtr ret; + + if (cur == NULL) return(NULL); + ret = xmlCreateEnumeration((xmlChar *) cur->name); + + if (cur->next != NULL) ret->next = xmlCopyEnumeration(cur->next); + else ret->next = NULL; + + return(ret); +} + +/** + * xmlDumpEnumeration: + * @buf: the XML buffer output + * @enum: An enumeration + * + * This will dump the content of the enumeration + */ +static void +xmlDumpEnumeration(xmlBufferPtr buf, xmlEnumerationPtr cur) { + if (cur == NULL) return; + + xmlBufferWriteCHAR(buf, cur->name); + if (cur->next == NULL) + xmlBufferWriteChar(buf, ")"); + else { + xmlBufferWriteChar(buf, " | "); + xmlDumpEnumeration(buf, cur->next); + } +} + +/** + * xmlCreateAttributeTable: + * + * create and initialize an empty attribute hash table. + * + * Returns the xmlAttributeTablePtr just created or NULL in case + * of error. + */ +static xmlAttributeTablePtr +xmlCreateAttributeTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlScanAttributeDeclCallback: + * @attr: the attribute decl + * @list: the list to update + * + * Callback called by xmlScanAttributeDecl when a new attribute + * has to be entered in the list. + */ +static void +xmlScanAttributeDeclCallback(xmlAttributePtr attr, xmlAttributePtr *list, + const xmlChar* name ATTRIBUTE_UNUSED) { + attr->nexth = *list; + *list = attr; +} + +/** + * xmlScanAttributeDecl: + * @dtd: pointer to the DTD + * @elem: the element name + * + * When inserting a new element scan the DtD for existing attributes + * for that element and initialize the Attribute chain + * + * Returns the pointer to the first attribute decl in the chain, + * possibly NULL. + */ +xmlAttributePtr +xmlScanAttributeDecl(xmlDtdPtr dtd, const xmlChar *elem) { + xmlAttributePtr ret = NULL; + xmlAttributeTablePtr table; + + if (dtd == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlScanAttributeDecl: dtd == NULL\n"); + return(NULL); + } + if (elem == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlScanAttributeDecl: elem == NULL\n"); + return(NULL); + } + table = (xmlAttributeTablePtr) dtd->attributes; + if (table == NULL) + return(NULL); + + /* WRONG !!! */ + xmlHashScan3(table, NULL, NULL, elem, + (xmlHashScanner) xmlScanAttributeDeclCallback, &ret); + return(ret); +} + +/** + * xmlScanIDAttributeDecl: + * @ctxt: the validation context + * @elem: the element name + * + * Verify that the element don't have too many ID attributes + * declared. + * + * Returns the number of ID attributes found. + */ +static int +xmlScanIDAttributeDecl(xmlValidCtxtPtr ctxt, xmlElementPtr elem) { + xmlAttributePtr cur; + int ret = 0; + + if (elem == NULL) return(0); + cur = elem->attributes; + while (cur != NULL) { + if (cur->atype == XML_ATTRIBUTE_ID) { + ret ++; + if (ret > 1) + VERROR(ctxt->userData, + "Element %s has too many ID attributes defined : %s\n", + elem->name, cur->name); + } + cur = cur->nexth; + } + return(ret); +} + +/** + * xmlFreeAttribute: + * @elem: An attribute + * + * Deallocate the memory used by an attribute definition + */ +static void +xmlFreeAttribute(xmlAttributePtr attr) { + if (attr == NULL) return; + xmlUnlinkNode((xmlNodePtr) attr); + if (attr->tree != NULL) + xmlFreeEnumeration(attr->tree); + if (attr->elem != NULL) + xmlFree((xmlChar *) attr->elem); + if (attr->name != NULL) + xmlFree((xmlChar *) attr->name); + if (attr->defaultValue != NULL) + xmlFree((xmlChar *) attr->defaultValue); + if (attr->prefix != NULL) + xmlFree((xmlChar *) attr->prefix); + xmlFree(attr); +} + + +/** + * xmlAddAttributeDecl: + * @ctxt: the validation context + * @dtd: pointer to the DTD + * @elem: the element name + * @name: the attribute name + * @ns: the attribute namespace prefix + * @type: the attribute type + * @def: the attribute default type + * @defaultValue: the attribute default value + * @tree: if it's an enumeration, the associated list + * + * Register a new attribute declaration + * Note that @tree becomes the ownership of the DTD + * + * Returns NULL if not new, otherwise the attribute decl + */ +xmlAttributePtr +xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem, + const xmlChar *name, const xmlChar *ns, + xmlAttributeType type, xmlAttributeDefault def, + const xmlChar *defaultValue, xmlEnumerationPtr tree) { + xmlAttributePtr ret; + xmlAttributeTablePtr table; + xmlElementPtr elemDef; + + if (dtd == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: dtd == NULL\n"); + xmlFreeEnumeration(tree); + return(NULL); + } + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: name == NULL\n"); + xmlFreeEnumeration(tree); + return(NULL); + } + if (elem == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: elem == NULL\n"); + xmlFreeEnumeration(tree); + return(NULL); + } + + /* + * Check the type and possibly the default value. + */ + switch (type) { + case XML_ATTRIBUTE_CDATA: + break; + case XML_ATTRIBUTE_ID: + break; + case XML_ATTRIBUTE_IDREF: + break; + case XML_ATTRIBUTE_IDREFS: + break; + case XML_ATTRIBUTE_ENTITY: + break; + case XML_ATTRIBUTE_ENTITIES: + break; + case XML_ATTRIBUTE_NMTOKEN: + break; + case XML_ATTRIBUTE_NMTOKENS: + break; + case XML_ATTRIBUTE_ENUMERATION: + break; + case XML_ATTRIBUTE_NOTATION: + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: unknown type %d\n", type); + xmlFreeEnumeration(tree); + return(NULL); + } + if ((defaultValue != NULL) && + (!xmlValidateAttributeValue(type, defaultValue))) { + VERROR(ctxt->userData, "Attribute %s of %s: invalid default value\n", + elem, name, defaultValue); + defaultValue = NULL; + ctxt->valid = 0; + } + + /* + * Check first that an attribute defined in the external subset wasn't + * already defined in the internal subset + */ + if ((dtd->doc != NULL) && (dtd->doc->extSubset == dtd) && + (dtd->doc->intSubset != NULL) && + (dtd->doc->intSubset->attributes != NULL)) { + ret = xmlHashLookup3(dtd->doc->intSubset->attributes, name, ns, elem); + if (ret != NULL) + return(NULL); + } + + /* + * Create the Attribute table if needed. + */ + table = (xmlAttributeTablePtr) dtd->attributes; + if (table == NULL) { + table = xmlCreateAttributeTable(); + dtd->attributes = (void *) table; + } + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: Table creation failed!\n"); + return(NULL); + } + + + ret = (xmlAttributePtr) xmlMalloc(sizeof(xmlAttribute)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlAttribute)); + ret->type = XML_ATTRIBUTE_DECL; + + /* + * fill the structure. + */ + ret->atype = type; + ret->name = xmlStrdup(name); + ret->prefix = xmlStrdup(ns); + ret->elem = xmlStrdup(elem); + ret->def = def; + ret->tree = tree; + if (defaultValue != NULL) + ret->defaultValue = xmlStrdup(defaultValue); + + /* + * Validity Check: + * Search the DTD for previous declarations of the ATTLIST + */ + if (xmlHashAddEntry3(table, name, ns, elem, ret) < 0) { + /* + * The attribute is already defined in this DTD. + */ + VWARNING(ctxt->userData, + "Attribute %s of element %s: already defined\n", + name, elem); + xmlFreeAttribute(ret); + return(NULL); + } + + /* + * Validity Check: + * Multiple ID per element + */ + elemDef = xmlGetDtdElementDesc2(dtd, elem, 1); + if (elemDef != NULL) { + + if ((type == XML_ATTRIBUTE_ID) && + (xmlScanIDAttributeDecl(NULL, elemDef) != 0)) { + VERROR(ctxt->userData, + "Element %s has too may ID attributes defined : %s\n", + elem, name); + ctxt->valid = 0; + } + + /* + * Insert namespace default def first they need to be + * processed first. + */ + if ((xmlStrEqual(ret->name, BAD_CAST "xmlns")) || + ((ret->prefix != NULL && + (xmlStrEqual(ret->prefix, BAD_CAST "xmlns"))))) { + ret->nexth = elemDef->attributes; + elemDef->attributes = ret; + } else { + xmlAttributePtr tmp = elemDef->attributes; + + while ((tmp != NULL) && + ((xmlStrEqual(tmp->name, BAD_CAST "xmlns")) || + ((ret->prefix != NULL && + (xmlStrEqual(ret->prefix, BAD_CAST "xmlns")))))) { + if (tmp->nexth == NULL) + break; + tmp = tmp->nexth; + } + if (tmp != NULL) { + ret->nexth = tmp->nexth; + tmp->nexth = ret; + } else { + ret->nexth = elemDef->attributes; + elemDef->attributes = ret; + } + } + } + + /* + * Link it to the DTD + */ + ret->parent = dtd; + ret->doc = dtd->doc; + if (dtd->last == NULL) { + dtd->children = dtd->last = (xmlNodePtr) ret; + } else { + dtd->last->next = (xmlNodePtr) ret; + ret->prev = dtd->last; + dtd->last = (xmlNodePtr) ret; + } + return(ret); +} + +/** + * xmlFreeAttributeTable: + * @table: An attribute table + * + * Deallocate the memory used by an entities hash table. + */ +void +xmlFreeAttributeTable(xmlAttributeTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeAttribute); +} + +/** + * xmlCopyAttribute: + * @attr: An attribute + * + * Build a copy of an attribute. + * + * Returns the new xmlAttributePtr or NULL in case of error. + */ +static xmlAttributePtr +xmlCopyAttribute(xmlAttributePtr attr) { + xmlAttributePtr cur; + + cur = (xmlAttributePtr) xmlMalloc(sizeof(xmlAttribute)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyAttribute: out of memory !\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlAttribute)); + cur->type = XML_ATTRIBUTE_DECL; + cur->atype = attr->atype; + cur->def = attr->def; + cur->tree = xmlCopyEnumeration(attr->tree); + if (attr->elem != NULL) + cur->elem = xmlStrdup(attr->elem); + if (attr->name != NULL) + cur->name = xmlStrdup(attr->name); + if (attr->prefix != NULL) + cur->prefix = xmlStrdup(attr->prefix); + if (attr->defaultValue != NULL) + cur->defaultValue = xmlStrdup(attr->defaultValue); + return(cur); +} + +/** + * xmlCopyAttributeTable: + * @table: An attribute table + * + * Build a copy of an attribute table. + * + * Returns the new xmlAttributeTablePtr or NULL in case of error. + */ +xmlAttributeTablePtr +xmlCopyAttributeTable(xmlAttributeTablePtr table) { + return((xmlAttributeTablePtr) xmlHashCopy(table, + (xmlHashCopier) xmlCopyAttribute)); +} + +/** + * xmlDumpAttributeDecl: + * @buf: the XML buffer output + * @attr: An attribute declaration + * + * This will dump the content of the attribute declaration as an XML + * DTD definition + */ +void +xmlDumpAttributeDecl(xmlBufferPtr buf, xmlAttributePtr attr) { + xmlBufferWriteChar(buf, "<!ATTLIST "); + xmlBufferWriteCHAR(buf, attr->elem); + xmlBufferWriteChar(buf, " "); + if (attr->prefix != NULL) { + xmlBufferWriteCHAR(buf, attr->prefix); + xmlBufferWriteChar(buf, ":"); + } + xmlBufferWriteCHAR(buf, attr->name); + switch (attr->atype) { + case XML_ATTRIBUTE_CDATA: + xmlBufferWriteChar(buf, " CDATA"); + break; + case XML_ATTRIBUTE_ID: + xmlBufferWriteChar(buf, " ID"); + break; + case XML_ATTRIBUTE_IDREF: + xmlBufferWriteChar(buf, " IDREF"); + break; + case XML_ATTRIBUTE_IDREFS: + xmlBufferWriteChar(buf, " IDREFS"); + break; + case XML_ATTRIBUTE_ENTITY: + xmlBufferWriteChar(buf, " ENTITY"); + break; + case XML_ATTRIBUTE_ENTITIES: + xmlBufferWriteChar(buf, " ENTITIES"); + break; + case XML_ATTRIBUTE_NMTOKEN: + xmlBufferWriteChar(buf, " NMTOKEN"); + break; + case XML_ATTRIBUTE_NMTOKENS: + xmlBufferWriteChar(buf, " NMTOKENS"); + break; + case XML_ATTRIBUTE_ENUMERATION: + xmlBufferWriteChar(buf, " ("); + xmlDumpEnumeration(buf, attr->tree); + break; + case XML_ATTRIBUTE_NOTATION: + xmlBufferWriteChar(buf, " NOTATION ("); + xmlDumpEnumeration(buf, attr->tree); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpAttributeDecl: internal: unknown type %d\n", + attr->atype); + } + switch (attr->def) { + case XML_ATTRIBUTE_NONE: + break; + case XML_ATTRIBUTE_REQUIRED: + xmlBufferWriteChar(buf, " #REQUIRED"); + break; + case XML_ATTRIBUTE_IMPLIED: + xmlBufferWriteChar(buf, " #IMPLIED"); + break; + case XML_ATTRIBUTE_FIXED: + xmlBufferWriteChar(buf, " #FIXED"); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpAttributeDecl: internal: unknown default %d\n", + attr->def); + } + if (attr->defaultValue != NULL) { + xmlBufferWriteChar(buf, " "); + xmlBufferWriteQuotedString(buf, attr->defaultValue); + } + xmlBufferWriteChar(buf, ">\n"); +} + +/** + * xmlDumpAttributeTable: + * @buf: the XML buffer output + * @table: An attribute table + * + * This will dump the content of the attribute table as an XML DTD definition + */ +void +xmlDumpAttributeTable(xmlBufferPtr buf, xmlAttributeTablePtr table) { + xmlHashScan(table, (xmlHashScanner) xmlDumpAttributeDecl, buf); +} + +/************************************************************************ + * * + * NOTATIONs * + * * + ************************************************************************/ +/** + * xmlCreateNotationTable: + * + * create and initialize an empty notation hash table. + * + * Returns the xmlNotationTablePtr just created or NULL in case + * of error. + */ +static xmlNotationTablePtr +xmlCreateNotationTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlFreeNotation: + * @not: A notation + * + * Deallocate the memory used by an notation definition + */ +static void +xmlFreeNotation(xmlNotationPtr nota) { + if (nota == NULL) return; + if (nota->name != NULL) + xmlFree((xmlChar *) nota->name); + if (nota->PublicID != NULL) + xmlFree((xmlChar *) nota->PublicID); + if (nota->SystemID != NULL) + xmlFree((xmlChar *) nota->SystemID); + xmlFree(nota); +} + + +/** + * xmlAddNotationDecl: + * @dtd: pointer to the DTD + * @ctxt: the validation context + * @name: the entity name + * @PublicID: the public identifier or NULL + * @SystemID: the system identifier or NULL + * + * Register a new notation declaration + * + * Returns NULL if not, otherwise the entity + */ +xmlNotationPtr +xmlAddNotationDecl(xmlValidCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlDtdPtr dtd, + const xmlChar *name, + const xmlChar *PublicID, const xmlChar *SystemID) { + xmlNotationPtr ret; + xmlNotationTablePtr table; + + if (dtd == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: dtd == NULL\n"); + return(NULL); + } + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: name == NULL\n"); + return(NULL); + } + if ((PublicID == NULL) && (SystemID == NULL)) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: no PUBLIC ID nor SYSTEM ID\n"); + return(NULL); + } + + /* + * Create the Notation table if needed. + */ + table = (xmlNotationTablePtr) dtd->notations; + if (table == NULL) + dtd->notations = table = xmlCreateNotationTable(); + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: Table creation failed!\n"); + return(NULL); + } + + ret = (xmlNotationPtr) xmlMalloc(sizeof(xmlNotation)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlNotation)); + + /* + * fill the structure. + */ + ret->name = xmlStrdup(name); + if (SystemID != NULL) + ret->SystemID = xmlStrdup(SystemID); + if (PublicID != NULL) + ret->PublicID = xmlStrdup(PublicID); + + /* + * Validity Check: + * Check the DTD for previous declarations of the ATTLIST + */ + if (xmlHashAddEntry(table, name, ret)) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: %s already defined\n", name); + xmlFreeNotation(ret); + return(NULL); + } + return(ret); +} + +/** + * xmlFreeNotationTable: + * @table: An notation table + * + * Deallocate the memory used by an entities hash table. + */ +void +xmlFreeNotationTable(xmlNotationTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeNotation); +} + +/** + * xmlCopyNotation: + * @nota: A notation + * + * Build a copy of a notation. + * + * Returns the new xmlNotationPtr or NULL in case of error. + */ +static xmlNotationPtr +xmlCopyNotation(xmlNotationPtr nota) { + xmlNotationPtr cur; + + cur = (xmlNotationPtr) xmlMalloc(sizeof(xmlNotation)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyNotation: out of memory !\n"); + return(NULL); + } + if (nota->name != NULL) + cur->name = xmlStrdup(nota->name); + else + cur->name = NULL; + if (nota->PublicID != NULL) + cur->PublicID = xmlStrdup(nota->PublicID); + else + cur->PublicID = NULL; + if (nota->SystemID != NULL) + cur->SystemID = xmlStrdup(nota->SystemID); + else + cur->SystemID = NULL; + return(cur); +} + +/** + * xmlCopyNotationTable: + * @table: A notation table + * + * Build a copy of a notation table. + * + * Returns the new xmlNotationTablePtr or NULL in case of error. + */ +xmlNotationTablePtr +xmlCopyNotationTable(xmlNotationTablePtr table) { + return((xmlNotationTablePtr) xmlHashCopy(table, + (xmlHashCopier) xmlCopyNotation)); +} + +/** + * xmlDumpNotationDecl: + * @buf: the XML buffer output + * @nota: A notation declaration + * + * This will dump the content the notation declaration as an XML DTD definition + */ +void +xmlDumpNotationDecl(xmlBufferPtr buf, xmlNotationPtr nota) { + xmlBufferWriteChar(buf, "<!NOTATION "); + xmlBufferWriteCHAR(buf, nota->name); + if (nota->PublicID != NULL) { + xmlBufferWriteChar(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf, nota->PublicID); + if (nota->SystemID != NULL) { + xmlBufferWriteChar(buf, " "); + xmlBufferWriteCHAR(buf, nota->SystemID); + } + } else { + xmlBufferWriteChar(buf, " SYSTEM "); + xmlBufferWriteCHAR(buf, nota->SystemID); + } + xmlBufferWriteChar(buf, " >\n"); +} + +/** + * xmlDumpNotationTable: + * @buf: the XML buffer output + * @table: A notation table + * + * This will dump the content of the notation table as an XML DTD definition + */ +void +xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table) { + xmlHashScan(table, (xmlHashScanner) xmlDumpNotationDecl, buf); +} + +/************************************************************************ + * * + * IDs * + * * + ************************************************************************/ +/** + * xmlCreateIDTable: + * + * create and initialize an empty id hash table. + * + * Returns the xmlIDTablePtr just created or NULL in case + * of error. + */ +static xmlIDTablePtr +xmlCreateIDTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlFreeID: + * @not: A id + * + * Deallocate the memory used by an id definition + */ +static void +xmlFreeID(xmlIDPtr id) { + if (id == NULL) return; + if (id->value != NULL) + xmlFree((xmlChar *) id->value); + xmlFree(id); +} + +/** + * xmlAddID: + * @ctxt: the validation context + * @doc: pointer to the document + * @value: the value name + * @attr: the attribute holding the ID + * + * Register a new id declaration + * + * Returns NULL if not, otherwise the new xmlIDPtr + */ +xmlIDPtr +xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value, + xmlAttrPtr attr) { + xmlIDPtr ret; + xmlIDTablePtr table; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddID: doc == NULL\n"); + return(NULL); + } + if (value == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddID: value == NULL\n"); + return(NULL); + } + if (attr == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddID: attr == NULL\n"); + return(NULL); + } + + /* + * Create the ID table if needed. + */ + table = (xmlIDTablePtr) doc->ids; + if (table == NULL) + doc->ids = table = xmlCreateIDTable(); + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddID: Table creation failed!\n"); + return(NULL); + } + + ret = (xmlIDPtr) xmlMalloc(sizeof(xmlID)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddID: out of memory\n"); + return(NULL); + } + + /* + * fill the structure. + */ + ret->value = xmlStrdup(value); + ret->attr = attr; + + if (xmlHashAddEntry(table, value, ret) < 0) { + /* + * The id is already defined in this DTD. + */ + if (ctxt != NULL) { + VECTXT(ctxt, attr->parent); + VERROR(ctxt->userData, "ID %s already defined\n", value); + } + xmlFreeID(ret); + return(NULL); + } + return(ret); +} + +/** + * xmlFreeIDTable: + * @table: An id table + * + * Deallocate the memory used by an ID hash table. + */ +void +xmlFreeIDTable(xmlIDTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeID); +} + +/** + * xmlIsID: + * @doc: the document + * @elem: the element carrying the attribute + * @attr: the attribute + * + * Determine whether an attribute is of type ID. In case we have DTD(s) + * then this is done if DTD loading has been requested. In the case + * of HTML documents parsed with the HTML parser, then ID detection is + * done systematically. + * + * Returns 0 or 1 depending on the lookup result + */ +int +xmlIsID(xmlDocPtr doc, xmlNodePtr elem, xmlAttrPtr attr) { + if (doc == NULL) return(0); + if (attr == NULL) return(0); + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) { + return(0); + } else if (doc->type == XML_HTML_DOCUMENT_NODE) { + if ((xmlStrEqual(BAD_CAST "id", attr->name)) || + (xmlStrEqual(BAD_CAST "name", attr->name))) + return(1); + return(0); + } else { + xmlAttributePtr attrDecl; + + if (elem == NULL) return(0); + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) { + /* + * TODO: this sucks ... recomputing this every time is stupid + */ + int len = xmlStrlen(elem->name) + xmlStrlen(elem->ns->prefix) + 2; + xmlChar *fullname; + + fullname = xmlMalloc(len); + if (fullname == NULL) + return(0); + snprintf((char *) fullname, len, "%s:%s", (char *) elem->ns->prefix, + (char *) elem->name); + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, fullname, + attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, fullname, + attr->name); + xmlFree(fullname); + } else { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, + attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, + attr->name); + } + + if ((attrDecl != NULL) && (attrDecl->atype == XML_ATTRIBUTE_ID)) + return(1); + } + return(0); +} + +/** + * xmlRemoveID: + * @doc: the document + * @attr: the attribute + * + * Remove the given attribute from the ID table maintained internally. + * + * Returns -1 if the lookup failed and 0 otherwise + */ +int +xmlRemoveID(xmlDocPtr doc, xmlAttrPtr attr) { + xmlAttrPtr cur; + xmlIDTablePtr table; + xmlChar *ID; + + if (doc == NULL) return(-1); + if (attr == NULL) return(-1); + table = (xmlIDTablePtr) doc->ids; + if (table == NULL) + return(-1); + + if (attr == NULL) + return(-1); + ID = xmlNodeListGetString(doc, attr->children, 1); + if (ID == NULL) + return(-1); + cur = xmlHashLookup(table, ID); + if (cur != attr) { + xmlFree(ID); + return(-1); + } + xmlHashUpdateEntry(table, ID, NULL, (xmlHashDeallocator) xmlFreeID); + xmlFree(ID); + return(0); +} + +/** + * xmlGetID: + * @doc: pointer to the document + * @ID: the ID value + * + * Search the attribute declaring the given ID + * + * Returns NULL if not found, otherwise the xmlAttrPtr defining the ID + */ +xmlAttrPtr +xmlGetID(xmlDocPtr doc, const xmlChar *ID) { + xmlIDTablePtr table; + xmlIDPtr id; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, "xmlGetID: doc == NULL\n"); + return(NULL); + } + + if (ID == NULL) { + xmlGenericError(xmlGenericErrorContext, "xmlGetID: ID == NULL\n"); + return(NULL); + } + + table = (xmlIDTablePtr) doc->ids; + if (table == NULL) + return(NULL); + + id = xmlHashLookup(table, ID); + if (id == NULL) + return(NULL); + return(id->attr); +} + +/************************************************************************ + * * + * Refs * + * * + ************************************************************************/ +typedef struct xmlRemoveMemo_t +{ + xmlListPtr l; + xmlAttrPtr ap; +} xmlRemoveMemo; + +typedef xmlRemoveMemo *xmlRemoveMemoPtr; + +typedef struct xmlValidateMemo_t +{ + xmlValidCtxtPtr ctxt; + const xmlChar *name; +} xmlValidateMemo; + +typedef xmlValidateMemo *xmlValidateMemoPtr; + +/** + * xmlCreateRefTable: + * + * create and initialize an empty ref hash table. + * + * Returns the xmlRefTablePtr just created or NULL in case + * of error. + */ +static xmlRefTablePtr +xmlCreateRefTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlFreeRef: + * @lk: A list link + * + * Deallocate the memory used by a ref definition + */ +static void +xmlFreeRef(xmlLinkPtr lk) { + xmlRefPtr ref = (xmlRefPtr)xmlLinkGetData(lk); + if (ref == NULL) return; + if (ref->value != NULL) + xmlFree((xmlChar *)ref->value); + xmlFree(ref); +} + +/** + * xmlFreeRefList: + * @list_ref: A list of references. + * + * Deallocate the memory used by a list of references + */ +static void +xmlFreeRefList(xmlListPtr list_ref) { + if (list_ref == NULL) return; + xmlListDelete(list_ref); +} + +/** + * xmlWalkRemoveRef: + * @data: Contents of current link + * @user: Value supplied by the user + * + * Returns 0 to abort the walk or 1 to continue + */ +static int +xmlWalkRemoveRef(const void *data, const void *user) +{ + xmlAttrPtr attr0 = ((xmlRefPtr)data)->attr; + xmlAttrPtr attr1 = ((xmlRemoveMemoPtr)user)->ap; + xmlListPtr ref_list = ((xmlRemoveMemoPtr)user)->l; + + if (attr0 == attr1) { /* Matched: remove and terminate walk */ + xmlListRemoveFirst(ref_list, (void *)data); + return 0; + } + return 1; +} + +/** + * xmlAddRef: + * @ctxt: the validation context + * @doc: pointer to the document + * @value: the value name + * @attr: the attribute holding the Ref + * + * Register a new ref declaration + * + * Returns NULL if not, otherwise the new xmlRefPtr + */ +xmlRefPtr +xmlAddRef(xmlValidCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlDocPtr doc, const xmlChar *value, + xmlAttrPtr attr) { + xmlRefPtr ret; + xmlRefTablePtr table; + xmlListPtr ref_list; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: doc == NULL\n"); + return(NULL); + } + if (value == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: value == NULL\n"); + return(NULL); + } + if (attr == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: attr == NULL\n"); + return(NULL); + } + + /* + * Create the Ref table if needed. + */ + table = (xmlRefTablePtr) doc->refs; + if (table == NULL) + doc->refs = table = xmlCreateRefTable(); + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: Table creation failed!\n"); + return(NULL); + } + + ret = (xmlRefPtr) xmlMalloc(sizeof(xmlRef)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: out of memory\n"); + return(NULL); + } + + /* + * fill the structure. + */ + ret->value = xmlStrdup(value); + ret->attr = attr; + + /* To add a reference :- + * References are maintained as a list of references, + * Lookup the entry, if no entry create new nodelist + * Add the owning node to the NodeList + * Return the ref + */ + + if (NULL == (ref_list = xmlHashLookup(table, value))) { + if (NULL == (ref_list = xmlListCreate(xmlFreeRef, NULL))) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: Reference list creation failed!\n"); + return(NULL); + } + if (xmlHashAddEntry(table, value, ref_list) < 0) { + xmlListDelete(ref_list); + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: Reference list insertion failed!\n"); + return(NULL); + } + } + xmlListInsert(ref_list, ret); + return(ret); +} + +/** + * xmlFreeRefTable: + * @table: An ref table + * + * Deallocate the memory used by an Ref hash table. + */ +void +xmlFreeRefTable(xmlRefTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeRefList); +} + +/** + * xmlIsRef: + * @doc: the document + * @elem: the element carrying the attribute + * @attr: the attribute + * + * Determine whether an attribute is of type Ref. In case we have DTD(s) + * then this is simple, otherwise we use an heuristic: name Ref (upper + * or lowercase). + * + * Returns 0 or 1 depending on the lookup result + */ +int +xmlIsRef(xmlDocPtr doc, xmlNodePtr elem, xmlAttrPtr attr) { + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) { + return(0); + } else if (doc->type == XML_HTML_DOCUMENT_NODE) { + /* TODO @@@ */ + return(0); + } else { + xmlAttributePtr attrDecl; + + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, + elem->name, attr->name); + + if ((attrDecl != NULL) && + (attrDecl->atype == XML_ATTRIBUTE_IDREF || + attrDecl->atype == XML_ATTRIBUTE_IDREFS)) + return(1); + } + return(0); +} + +/** + * xmlRemoveRef: + * @doc: the document + * @attr: the attribute + * + * Remove the given attribute from the Ref table maintained internally. + * + * Returns -1 if the lookup failed and 0 otherwise + */ +int +xmlRemoveRef(xmlDocPtr doc, xmlAttrPtr attr) { + xmlListPtr ref_list; + xmlRefTablePtr table; + xmlChar *ID; + xmlRemoveMemo target; + + if (doc == NULL) return(-1); + if (attr == NULL) return(-1); + table = (xmlRefTablePtr) doc->refs; + if (table == NULL) + return(-1); + + if (attr == NULL) + return(-1); + ID = xmlNodeListGetString(doc, attr->children, 1); + if (ID == NULL) + return(-1); + ref_list = xmlHashLookup(table, ID); + + if(ref_list == NULL) { + xmlFree(ID); + return (-1); + } + /* At this point, ref_list refers to a list of references which + * have the same key as the supplied attr. Our list of references + * is ordered by reference address and we don't have that information + * here to use when removing. We'll have to walk the list and + * check for a matching attribute, when we find one stop the walk + * and remove the entry. + * The list is ordered by reference, so that means we don't have the + * key. Passing the list and the reference to the walker means we + * will have enough data to be able to remove the entry. + */ + target.l = ref_list; + target.ap = attr; + + /* Remove the supplied attr from our list */ + xmlListWalk(ref_list, xmlWalkRemoveRef, &target); + + /*If the list is empty then remove the list entry in the hash */ + if (xmlListEmpty(ref_list)) + xmlHashUpdateEntry(table, ID, NULL, (xmlHashDeallocator) + xmlFreeRefList); + xmlFree(ID); + return(0); +} + +/** + * xmlGetRefs: + * @doc: pointer to the document + * @ID: the ID value + * + * Find the set of references for the supplied ID. + * + * Returns NULL if not found, otherwise node set for the ID. + */ +xmlListPtr +xmlGetRefs(xmlDocPtr doc, const xmlChar *ID) { + xmlRefTablePtr table; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, "xmlGetRefs: doc == NULL\n"); + return(NULL); + } + + if (ID == NULL) { + xmlGenericError(xmlGenericErrorContext, "xmlGetRefs: ID == NULL\n"); + return(NULL); + } + + table = (xmlRefTablePtr) doc->refs; + if (table == NULL) + return(NULL); + + return (xmlHashLookup(table, ID)); +} + +/************************************************************************ + * * + * Routines for validity checking * + * * + ************************************************************************/ + +/** + * xmlGetDtdElementDesc: + * @dtd: a pointer to the DtD to search + * @name: the element name + * + * Search the DTD for the description of this element + * + * returns the xmlElementPtr if found or NULL + */ + +xmlElementPtr +xmlGetDtdElementDesc(xmlDtdPtr dtd, const xmlChar *name) { + xmlElementTablePtr table; + xmlElementPtr cur; + xmlChar *uqname = NULL, *prefix = NULL; + + if (dtd == NULL) return(NULL); + if (dtd->elements == NULL) + return(NULL); + table = (xmlElementTablePtr) dtd->elements; + + uqname = xmlSplitQName2(name, &prefix); + if (uqname != NULL) + name = uqname; + cur = xmlHashLookup2(table, name, prefix); + if (prefix != NULL) xmlFree(prefix); + if (uqname != NULL) xmlFree(uqname); + return(cur); +} +/** + * xmlGetDtdElementDesc2: + * @dtd: a pointer to the DtD to search + * @name: the element name + * @create: create an empty description if not found + * + * Search the DTD for the description of this element + * + * returns the xmlElementPtr if found or NULL + */ + +static xmlElementPtr +xmlGetDtdElementDesc2(xmlDtdPtr dtd, const xmlChar *name, int create) { + xmlElementTablePtr table; + xmlElementPtr cur; + xmlChar *uqname = NULL, *prefix = NULL; + + if (dtd == NULL) return(NULL); + if (dtd->elements == NULL) { + if (!create) + return(NULL); + /* + * Create the Element table if needed. + */ + table = (xmlElementTablePtr) dtd->elements; + if (table == NULL) { + table = xmlCreateElementTable(); + dtd->elements = (void *) table; + } + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlGetDtdElementDesc2: Table creation failed!\n"); + return(NULL); + } + } + table = (xmlElementTablePtr) dtd->elements; + + uqname = xmlSplitQName2(name, &prefix); + if (uqname != NULL) + name = uqname; + cur = xmlHashLookup2(table, name, prefix); + if ((cur == NULL) && (create)) { + cur = (xmlElementPtr) xmlMalloc(sizeof(xmlElement)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlGetDtdElementDesc2: out of memory\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlElement)); + cur->type = XML_ELEMENT_DECL; + + /* + * fill the structure. + */ + cur->name = xmlStrdup(name); + cur->prefix = xmlStrdup(prefix); + cur->etype = XML_ELEMENT_TYPE_UNDEFINED; + + xmlHashAddEntry2(table, name, prefix, cur); + } + if (prefix != NULL) xmlFree(prefix); + if (uqname != NULL) xmlFree(uqname); + return(cur); +} + +/** + * xmlGetDtdQElementDesc: + * @dtd: a pointer to the DtD to search + * @name: the element name + * @prefix: the element namespace prefix + * + * Search the DTD for the description of this element + * + * returns the xmlElementPtr if found or NULL + */ + +xmlElementPtr +xmlGetDtdQElementDesc(xmlDtdPtr dtd, const xmlChar *name, + const xmlChar *prefix) { + xmlElementTablePtr table; + + if (dtd == NULL) return(NULL); + if (dtd->elements == NULL) return(NULL); + table = (xmlElementTablePtr) dtd->elements; + + return(xmlHashLookup2(table, name, prefix)); +} + +/** + * xmlGetDtdAttrDesc: + * @dtd: a pointer to the DtD to search + * @elem: the element name + * @name: the attribute name + * + * Search the DTD for the description of this attribute on + * this element. + * + * returns the xmlAttributePtr if found or NULL + */ + +xmlAttributePtr +xmlGetDtdAttrDesc(xmlDtdPtr dtd, const xmlChar *elem, const xmlChar *name) { + xmlAttributeTablePtr table; + xmlAttributePtr cur; + xmlChar *uqname = NULL, *prefix = NULL; + + if (dtd == NULL) return(NULL); + if (dtd->attributes == NULL) return(NULL); + + table = (xmlAttributeTablePtr) dtd->attributes; + if (table == NULL) + return(NULL); + + uqname = xmlSplitQName2(name, &prefix); + + if (uqname != NULL) { + cur = xmlHashLookup3(table, uqname, prefix, elem); + if (prefix != NULL) xmlFree(prefix); + if (uqname != NULL) xmlFree(uqname); + } else + cur = xmlHashLookup3(table, name, NULL, elem); + return(cur); +} + +/** + * xmlGetDtdQAttrDesc: + * @dtd: a pointer to the DtD to search + * @elem: the element name + * @name: the attribute name + * @prefix: the attribute namespace prefix + * + * Search the DTD for the description of this qualified attribute on + * this element. + * + * returns the xmlAttributePtr if found or NULL + */ + +xmlAttributePtr +xmlGetDtdQAttrDesc(xmlDtdPtr dtd, const xmlChar *elem, const xmlChar *name, + const xmlChar *prefix) { + xmlAttributeTablePtr table; + + if (dtd == NULL) return(NULL); + if (dtd->attributes == NULL) return(NULL); + table = (xmlAttributeTablePtr) dtd->attributes; + + return(xmlHashLookup3(table, name, prefix, elem)); +} + +/** + * xmlGetDtdNotationDesc: + * @dtd: a pointer to the DtD to search + * @name: the notation name + * + * Search the DTD for the description of this notation + * + * returns the xmlNotationPtr if found or NULL + */ + +xmlNotationPtr +xmlGetDtdNotationDesc(xmlDtdPtr dtd, const xmlChar *name) { + xmlNotationTablePtr table; + + if (dtd == NULL) return(NULL); + if (dtd->notations == NULL) return(NULL); + table = (xmlNotationTablePtr) dtd->notations; + + return(xmlHashLookup(table, name)); +} + +/** + * xmlValidateNotationUse: + * @ctxt: the validation context + * @doc: the document + * @notationName: the notation name to check + * + * Validate that the given name match a notation declaration. + * - [ VC: Notation Declared ] + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNotationUse(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + const xmlChar *notationName) { + xmlNotationPtr notaDecl; + if ((doc == NULL) || (doc->intSubset == NULL)) return(-1); + + notaDecl = xmlGetDtdNotationDesc(doc->intSubset, notationName); + if ((notaDecl == NULL) && (doc->extSubset != NULL)) + notaDecl = xmlGetDtdNotationDesc(doc->extSubset, notationName); + + if (notaDecl == NULL) { + VERROR(ctxt->userData, "NOTATION %s is not declared\n", + notationName); + return(0); + } + return(1); +} + +/** + * xmlIsMixedElement: + * @doc: the document + * @name: the element name + * + * Search in the DtDs whether an element accept Mixed content (or ANY) + * basically if it is supposed to accept text childs + * + * returns 0 if no, 1 if yes, and -1 if no element description is available + */ + +int +xmlIsMixedElement(xmlDocPtr doc, const xmlChar *name) { + xmlElementPtr elemDecl; + + if ((doc == NULL) || (doc->intSubset == NULL)) return(-1); + + elemDecl = xmlGetDtdElementDesc(doc->intSubset, name); + if ((elemDecl == NULL) && (doc->extSubset != NULL)) + elemDecl = xmlGetDtdElementDesc(doc->extSubset, name); + if (elemDecl == NULL) return(-1); + switch (elemDecl->etype) { + case XML_ELEMENT_TYPE_UNDEFINED: + return(-1); + case XML_ELEMENT_TYPE_ELEMENT: + return(0); + case XML_ELEMENT_TYPE_EMPTY: + /* + * return 1 for EMPTY since we want VC error to pop up + * on <empty> </empty> for example + */ + case XML_ELEMENT_TYPE_ANY: + case XML_ELEMENT_TYPE_MIXED: + return(1); + } + return(1); +} + +/** + * xmlValidateNameValue: + * @value: an Name value + * + * Validate that the given value match Name production + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNameValue(const xmlChar *value) { + const xmlChar *cur; + int val, len; + + if (value == NULL) return(0); + cur = value; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + if (!IS_LETTER(val) && (val != '_') && + (val != ':')) { + return(0); + } + + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + if (val != 0) return(0); + + return(1); +} + +/** + * xmlValidateNamesValue: + * @value: an Names value + * + * Validate that the given value match Names production + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNamesValue(const xmlChar *value) { + const xmlChar *cur; + int val, len; + + if (value == NULL) return(0); + cur = value; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + + if (!IS_LETTER(val) && (val != '_') && + (val != ':')) { + return(0); + } + + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + while (IS_BLANK(val)) { + while (IS_BLANK(val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + if (!IS_LETTER(val) && (val != '_') && + (val != ':')) { + return(0); + } + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + } + + if (val != 0) return(0); + + return(1); +} + +/** + * xmlValidateNmtokenValue: + * @value: an Nmtoken value + * + * Validate that the given value match Nmtoken production + * + * [ VC: Name Token ] + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNmtokenValue(const xmlChar *value) { + const xmlChar *cur; + int val, len; + + if (value == NULL) return(0); + cur = value; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + + if (!IS_LETTER(val) && !IS_DIGIT(val) && + (val != '.') && (val != '-') && + (val != '_') && (val != ':') && + (!IS_COMBINING(val)) && + (!IS_EXTENDER(val))) + return(0); + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + if (val != 0) return(0); + + return(1); +} + +/** + * xmlValidateNmtokensValue: + * @value: an Nmtokens value + * + * Validate that the given value match Nmtokens production + * + * [ VC: Name Token ] + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNmtokensValue(const xmlChar *value) { + const xmlChar *cur; + int val, len; + + if (value == NULL) return(0); + cur = value; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + + while (IS_BLANK(val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + if (!IS_LETTER(val) && !IS_DIGIT(val) && + (val != '.') && (val != '-') && + (val != '_') && (val != ':') && + (!IS_COMBINING(val)) && + (!IS_EXTENDER(val))) + return(0); + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + while (IS_BLANK(val)) { + while (IS_BLANK(val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + if (val == 0) return(1); + + if (!IS_LETTER(val) && !IS_DIGIT(val) && + (val != '.') && (val != '-') && + (val != '_') && (val != ':') && + (!IS_COMBINING(val)) && + (!IS_EXTENDER(val))) + return(0); + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + } + + if (val != 0) return(0); + + return(1); +} + +/** + * xmlValidateNotationDecl: + * @ctxt: the validation context + * @doc: a document instance + * @nota: a notation definition + * + * Try to validate a single notation definition + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - it seems that no validity constraint exists on notation declarations + * But this function get called anyway ... + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNotationDecl(xmlValidCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlDocPtr doc ATTRIBUTE_UNUSED, + xmlNotationPtr nota ATTRIBUTE_UNUSED) { + int ret = 1; + + return(ret); +} + +/** + * xmlValidateAttributeValue: + * @type: an attribute type + * @value: an attribute value + * + * Validate that the given attribute value match the proper production + * + * [ VC: ID ] + * Values of type ID must match the Name production.... + * + * [ VC: IDREF ] + * Values of type IDREF must match the Name production, and values + * of type IDREFS must match Names ... + * + * [ VC: Entity Name ] + * Values of type ENTITY must match the Name production, values + * of type ENTITIES must match Names ... + * + * [ VC: Name Token ] + * Values of type NMTOKEN must match the Nmtoken production; values + * of type NMTOKENS must match Nmtokens. + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateAttributeValue(xmlAttributeType type, const xmlChar *value) { + switch (type) { + case XML_ATTRIBUTE_ENTITIES: + case XML_ATTRIBUTE_IDREFS: + return(xmlValidateNamesValue(value)); + case XML_ATTRIBUTE_ENTITY: + case XML_ATTRIBUTE_IDREF: + case XML_ATTRIBUTE_ID: + case XML_ATTRIBUTE_NOTATION: + return(xmlValidateNameValue(value)); + case XML_ATTRIBUTE_NMTOKENS: + case XML_ATTRIBUTE_ENUMERATION: + return(xmlValidateNmtokensValue(value)); + case XML_ATTRIBUTE_NMTOKEN: + return(xmlValidateNmtokenValue(value)); + case XML_ATTRIBUTE_CDATA: + break; + } + return(1); +} + +/** + * xmlValidateAttributeValue2: + * @ctxt: the validation context + * @doc: the document + * @name: the attribute name (used for error reporting only) + * @type: the attribute type + * @value: the attribute value + * + * Validate that the given attribute value match a given type. + * This typically cannot be done before having finished parsing + * the subsets. + * + * [ VC: IDREF ] + * Values of type IDREF must match one of the declared IDs + * Values of type IDREFS must match a sequence of the declared IDs + * each Name must match the value of an ID attribute on some element + * in the XML document; i.e. IDREF values must match the value of + * some ID attribute + * + * [ VC: Entity Name ] + * Values of type ENTITY must match one declared entity + * Values of type ENTITIES must match a sequence of declared entities + * + * [ VC: Notation Attributes ] + * all notation names in the declaration must be declared. + * + * returns 1 if valid or 0 otherwise + */ + +static int +xmlValidateAttributeValue2(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + const xmlChar *name, xmlAttributeType type, const xmlChar *value) { + int ret = 1; + switch (type) { + case XML_ATTRIBUTE_IDREFS: + case XML_ATTRIBUTE_IDREF: + case XML_ATTRIBUTE_ID: + case XML_ATTRIBUTE_NMTOKENS: + case XML_ATTRIBUTE_ENUMERATION: + case XML_ATTRIBUTE_NMTOKEN: + case XML_ATTRIBUTE_CDATA: + break; + case XML_ATTRIBUTE_ENTITY: { + xmlEntityPtr ent; + + ent = xmlGetDocEntity(doc, value); + if ((ent == NULL) && (doc->standalone == 1)) { + doc->standalone = 0; + ent = xmlGetDocEntity(doc, value); + if (ent != NULL) { + VERROR(ctxt->userData, +"standalone problem: attribute %s reference entity \"%s\" in external subset\n", + name, value); + /* WAIT to get answer from the Core WG on this + ret = 0; + */ + } + } + if (ent == NULL) { + VERROR(ctxt->userData, + "ENTITY attribute %s reference an unknown entity \"%s\"\n", + name, value); + ret = 0; + } else if (ent->etype != XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + VERROR(ctxt->userData, + "ENTITY attribute %s reference an entity \"%s\" of wrong type\n", + name, value); + ret = 0; + } + break; + } + case XML_ATTRIBUTE_ENTITIES: { + xmlChar *dup, *nam = NULL, *cur, save; + xmlEntityPtr ent; + + dup = xmlStrdup(value); + if (dup == NULL) + return(0); + cur = dup; + while (*cur != 0) { + nam = cur; + while ((*cur != 0) && (!IS_BLANK(*cur))) cur++; + save = *cur; + *cur = 0; + ent = xmlGetDocEntity(doc, nam); + if (ent == NULL) { + VERROR(ctxt->userData, + "ENTITIES attribute %s reference an unknown entity \"%s\"\n", + name, nam); + ret = 0; + } else if (ent->etype != XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + VERROR(ctxt->userData, + "ENTITIES attribute %s reference an entity \"%s\" of wrong type\n", + name, nam); + ret = 0; + } + if (save == 0) + break; + *cur = save; + while (IS_BLANK(*cur)) cur++; + } + xmlFree(dup); + break; + } + case XML_ATTRIBUTE_NOTATION: { + xmlNotationPtr nota; + + nota = xmlGetDtdNotationDesc(doc->intSubset, value); + if ((nota == NULL) && (doc->extSubset != NULL)) + nota = xmlGetDtdNotationDesc(doc->extSubset, value); + + if (nota == NULL) { + VERROR(ctxt->userData, + "NOTATION attribute %s reference an unknown notation \"%s\"\n", + name, value); + ret = 0; + } + break; + } + } + return(ret); +} + +/** + * xmlValidCtxtNormalizeAttributeValue: + * @ctxt: the validation context + * @doc: the document + * @elem: the parent + * @name: the attribute name + * @value: the attribute value + * @ctxt: the validation context or NULL + * + * Does the validation related extra step of the normalization of attribute + * values: + * + * If the declared value is not CDATA, then the XML processor must further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by single space (#x20) character. + * + * Also check VC: Standalone Document Declaration in P32, and update + * ctxt->valid accordingly + * + * returns a new normalized string if normalization is needed, NULL otherwise + * the caller must free the returned value. + */ + +xmlChar * +xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem, const xmlChar *name, const xmlChar *value) { + xmlChar *ret, *dst; + const xmlChar *src; + xmlAttributePtr attrDecl = NULL; + int extsubset = 0; + + if (doc == NULL) return(NULL); + if (elem == NULL) return(NULL); + if (name == NULL) return(NULL); + if (value == NULL) return(NULL); + + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) { + xmlChar qname[500]; + snprintf((char *) qname, sizeof(qname), "%s:%s", + elem->ns->prefix, elem->name); + qname[sizeof(qname) - 1] = 0; + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) { + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, qname, name); + if (attrDecl != NULL) + extsubset = 1; + } + } + if ((attrDecl == NULL) && (doc->intSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) { + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, name); + if (attrDecl != NULL) + extsubset = 1; + } + + if (attrDecl == NULL) + return(NULL); + if (attrDecl->atype == XML_ATTRIBUTE_CDATA) + return(NULL); + + ret = xmlStrdup(value); + if (ret == NULL) + return(NULL); + src = value; + dst = ret; + while (*src == 0x20) src++; + while (*src != 0) { + if (*src == 0x20) { + while (*src == 0x20) src++; + if (*src != 0) + *dst++ = 0x20; + } else { + *dst++ = *src++; + } + } + *dst = 0; + if ((doc->standalone) && (extsubset == 1) && (!xmlStrEqual(value, ret))) { + VERROR(ctxt->userData, +"standalone: %s on %s value had to be normalized based on external subset declaration\n", + name, elem->name); + ctxt->valid = 0; + } + return(ret); +} + +/** + * xmlValidNormalizeAttributeValue: + * @doc: the document + * @elem: the parent + * @name: the attribute name + * @value: the attribute value + * + * Does the validation related extra step of the normalization of attribute + * values: + * + * If the declared value is not CDATA, then the XML processor must further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by single space (#x20) character. + * + * returns a new normalized string if normalization is needed, NULL otherwise + * the caller must free the returned value. + */ + +xmlChar * +xmlValidNormalizeAttributeValue(xmlDocPtr doc, xmlNodePtr elem, + const xmlChar *name, const xmlChar *value) { + xmlChar *ret, *dst; + const xmlChar *src; + xmlAttributePtr attrDecl = NULL; + + if (doc == NULL) return(NULL); + if (elem == NULL) return(NULL); + if (name == NULL) return(NULL); + if (value == NULL) return(NULL); + + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) { + xmlChar qname[500]; + snprintf((char *) qname, sizeof(qname), "%s:%s", + elem->ns->prefix, elem->name); + qname[sizeof(qname) - 1] = 0; + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, qname, name); + } + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, name); + + if (attrDecl == NULL) + return(NULL); + if (attrDecl->atype == XML_ATTRIBUTE_CDATA) + return(NULL); + + ret = xmlStrdup(value); + if (ret == NULL) + return(NULL); + src = value; + dst = ret; + while (*src == 0x20) src++; + while (*src != 0) { + if (*src == 0x20) { + while (*src == 0x20) src++; + if (*src != 0) + *dst++ = 0x20; + } else { + *dst++ = *src++; + } + } + *dst = 0; + return(ret); +} + +static void +xmlValidateAttributeIdCallback(xmlAttributePtr attr, int *count, + const xmlChar* name ATTRIBUTE_UNUSED) { + if (attr->atype == XML_ATTRIBUTE_ID) (*count)++; +} + +/** + * xmlValidateAttributeDecl: + * @ctxt: the validation context + * @doc: a document instance + * @attr: an attribute definition + * + * Try to validate a single attribute definition + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: Attribute Default Legal ] + * - [ VC: Enumeration ] + * - [ VC: ID Attribute Default ] + * + * The ID/IDREF uniqueness and matching are done separately + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlAttributePtr attr) { + int ret = 1; + int val; + CHECK_DTD; + if(attr == NULL) return(1); + + /* Attribute Default Legal */ + /* Enumeration */ + if (attr->defaultValue != NULL) { + val = xmlValidateAttributeValue(attr->atype, attr->defaultValue); + if (val == 0) { + VERROR(ctxt->userData, + "Syntax of default value for attribute %s of %s is not valid\n", + attr->name, attr->elem); + } + ret &= val; + } + + /* ID Attribute Default */ + if ((attr->atype == XML_ATTRIBUTE_ID)&& + (attr->def != XML_ATTRIBUTE_IMPLIED) && + (attr->def != XML_ATTRIBUTE_REQUIRED)) { + VERROR(ctxt->userData, + "ID attribute %s of %s is not valid must be #IMPLIED or #REQUIRED\n", + attr->name, attr->elem); + ret = 0; + } + + /* One ID per Element Type */ + if (attr->atype == XML_ATTRIBUTE_ID) { + int nbId; + + /* the trick is that we parse DtD as their own internal subset */ + xmlElementPtr elem = xmlGetDtdElementDesc(doc->intSubset, + attr->elem); + if (elem != NULL) { + nbId = xmlScanIDAttributeDecl(NULL, elem); + } else { + xmlAttributeTablePtr table; + + /* + * The attribute may be declared in the internal subset and the + * element in the external subset. + */ + nbId = 0; + table = (xmlAttributeTablePtr) doc->intSubset->attributes; + xmlHashScan3(table, NULL, NULL, attr->elem, (xmlHashScanner) + xmlValidateAttributeIdCallback, &nbId); + } + if (nbId > 1) { + VERROR(ctxt->userData, + "Element %s has %d ID attribute defined in the internal subset : %s\n", + attr->elem, nbId, attr->name); + } else if (doc->extSubset != NULL) { + int extId = 0; + elem = xmlGetDtdElementDesc(doc->extSubset, attr->elem); + if (elem != NULL) { + extId = xmlScanIDAttributeDecl(NULL, elem); + } + if (extId > 1) { + VERROR(ctxt->userData, + "Element %s has %d ID attribute defined in the external subset : %s\n", + attr->elem, extId, attr->name); + } else if (extId + nbId > 1) { + VERROR(ctxt->userData, +"Element %s has ID attributes defined in the internal and external subset : %s\n", + attr->elem, attr->name); + } + } + } + + /* Validity Constraint: Enumeration */ + if ((attr->defaultValue != NULL) && (attr->tree != NULL)) { + xmlEnumerationPtr tree = attr->tree; + while (tree != NULL) { + if (xmlStrEqual(tree->name, attr->defaultValue)) break; + tree = tree->next; + } + if (tree == NULL) { + VERROR(ctxt->userData, +"Default value \"%s\" for attribute %s of %s is not among the enumerated set\n", + attr->defaultValue, attr->name, attr->elem); + ret = 0; + } + } + + return(ret); +} + +/** + * xmlValidateElementDecl: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element definition + * + * Try to validate a single element definition + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: One ID per Element Type ] + * - [ VC: No Duplicate Types ] + * - [ VC: Unique Element Type Declaration ] + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateElementDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlElementPtr elem) { + int ret = 1; + xmlElementPtr tst; + + CHECK_DTD; + + if (elem == NULL) return(1); + +#if 0 +#ifdef LIBXML_REGEXP_ENABLED + /* Build the regexp associated to the content model */ + ret = xmlValidBuildContentModel(ctxt, elem); +#endif +#endif + + /* No Duplicate Types */ + if (elem->etype == XML_ELEMENT_TYPE_MIXED) { + xmlElementContentPtr cur, next; + const xmlChar *name; + + cur = elem->content; + while (cur != NULL) { + if (cur->type != XML_ELEMENT_CONTENT_OR) break; + if (cur->c1 == NULL) break; + if (cur->c1->type == XML_ELEMENT_CONTENT_ELEMENT) { + name = cur->c1->name; + next = cur->c2; + while (next != NULL) { + if (next->type == XML_ELEMENT_CONTENT_ELEMENT) { + if (xmlStrEqual(next->name, name)) { + VERROR(ctxt->userData, + "Definition of %s has duplicate references of %s\n", + elem->name, name); + ret = 0; + } + break; + } + if (next->c1 == NULL) break; + if (next->c1->type != XML_ELEMENT_CONTENT_ELEMENT) break; + if (xmlStrEqual(next->c1->name, name)) { + VERROR(ctxt->userData, + "Definition of %s has duplicate references of %s\n", + elem->name, name); + ret = 0; + } + next = next->c2; + } + } + cur = cur->c2; + } + } + + /* VC: Unique Element Type Declaration */ + tst = xmlGetDtdElementDesc(doc->intSubset, elem->name); + if ((tst != NULL ) && (tst != elem) && + ((tst->prefix == elem->prefix) || + (xmlStrEqual(tst->prefix, elem->prefix))) && + (tst->etype != XML_ELEMENT_TYPE_UNDEFINED)) { + VERROR(ctxt->userData, "Redefinition of element %s\n", + elem->name); + ret = 0; + } + tst = xmlGetDtdElementDesc(doc->extSubset, elem->name); + if ((tst != NULL ) && (tst != elem) && + ((tst->prefix == elem->prefix) || + (xmlStrEqual(tst->prefix, elem->prefix))) && + (tst->etype != XML_ELEMENT_TYPE_UNDEFINED)) { + VERROR(ctxt->userData, "Redefinition of element %s\n", + elem->name); + ret = 0; + } + /* One ID per Element Type + * already done when registering the attribute + if (xmlScanIDAttributeDecl(ctxt, elem) > 1) { + ret = 0; + } */ + return(ret); +} + +/** + * xmlValidateOneAttribute: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * @attr: an attribute instance + * @value: the attribute value (without entities processing) + * + * Try to validate a single attribute for an element + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: Attribute Value Type ] + * - [ VC: Fixed Attribute Default ] + * - [ VC: Entity Name ] + * - [ VC: Name Token ] + * - [ VC: ID ] + * - [ VC: IDREF ] + * - [ VC: Entity Name ] + * - [ VC: Notation Attributes ] + * + * The ID/IDREF uniqueness and matching are done separately + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateOneAttribute(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem, xmlAttrPtr attr, const xmlChar *value) { + /* xmlElementPtr elemDecl; */ + xmlAttributePtr attrDecl = NULL; + int val; + int ret = 1; + + CHECK_DTD; + if ((elem == NULL) || (elem->name == NULL)) return(0); + if ((attr == NULL) || (attr->name == NULL)) return(0); + + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) { + xmlChar qname[500]; + snprintf((char *) qname, sizeof(qname), "%s:%s", + elem->ns->prefix, elem->name); + qname[sizeof(qname) - 1] = 0; + if (attr->ns != NULL) { + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, qname, + attr->name, attr->ns->prefix); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, qname, + attr->name, attr->ns->prefix); + } else { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, + qname, attr->name); + } + } + if (attrDecl == NULL) { + if (attr->ns != NULL) { + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, elem->name, + attr->name, attr->ns->prefix); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, elem->name, + attr->name, attr->ns->prefix); + } else { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, + elem->name, attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, + elem->name, attr->name); + } + } + + + /* Validity Constraint: Attribute Value Type */ + if (attrDecl == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "No declaration for attribute %s of element %s\n", + attr->name, elem->name); + return(0); + } + attr->atype = attrDecl->atype; + + val = xmlValidateAttributeValue(attrDecl->atype, value); + if (val == 0) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Syntax of value for attribute %s of %s is not valid\n", + attr->name, elem->name); + ret = 0; + } + + /* Validity constraint: Fixed Attribute Default */ + if (attrDecl->def == XML_ATTRIBUTE_FIXED) { + if (!xmlStrEqual(value, attrDecl->defaultValue)) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Value for attribute %s of %s is different from default \"%s\"\n", + attr->name, elem->name, attrDecl->defaultValue); + ret = 0; + } + } + + /* Validity Constraint: ID uniqueness */ + if (attrDecl->atype == XML_ATTRIBUTE_ID) { + if (xmlAddID(ctxt, doc, value, attr) == NULL) + ret = 0; + } + + if ((attrDecl->atype == XML_ATTRIBUTE_IDREF) || + (attrDecl->atype == XML_ATTRIBUTE_IDREFS)) { + if (xmlAddRef(ctxt, doc, value, attr) == NULL) + ret = 0; + } + + /* Validity Constraint: Notation Attributes */ + if (attrDecl->atype == XML_ATTRIBUTE_NOTATION) { + xmlEnumerationPtr tree = attrDecl->tree; + xmlNotationPtr nota; + + /* First check that the given NOTATION was declared */ + nota = xmlGetDtdNotationDesc(doc->intSubset, value); + if (nota == NULL) + nota = xmlGetDtdNotationDesc(doc->extSubset, value); + + if (nota == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Value \"%s\" for attribute %s of %s is not a declared Notation\n", + value, attr->name, elem->name); + ret = 0; + } + + /* Second, verify that it's among the list */ + while (tree != NULL) { + if (xmlStrEqual(tree->name, value)) break; + tree = tree->next; + } + if (tree == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, +"Value \"%s\" for attribute %s of %s is not among the enumerated notations\n", + value, attr->name, elem->name); + ret = 0; + } + } + + /* Validity Constraint: Enumeration */ + if (attrDecl->atype == XML_ATTRIBUTE_ENUMERATION) { + xmlEnumerationPtr tree = attrDecl->tree; + while (tree != NULL) { + if (xmlStrEqual(tree->name, value)) break; + tree = tree->next; + } + if (tree == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Value \"%s\" for attribute %s of %s is not among the enumerated set\n", + value, attr->name, elem->name); + ret = 0; + } + } + + /* Fixed Attribute Default */ + if ((attrDecl->def == XML_ATTRIBUTE_FIXED) && + (!xmlStrEqual(attrDecl->defaultValue, value))) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Value for attribute %s of %s must be \"%s\"\n", + attr->name, elem->name, attrDecl->defaultValue); + ret = 0; + } + + /* Extra check for the attribute value */ + ret &= xmlValidateAttributeValue2(ctxt, doc, attr->name, + attrDecl->atype, value); + + return(ret); +} + +/** + * xmlValidateOneNamespace: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * @prefix: the namespace prefix + * @ns: an namespace declaration instance + * @value: the attribute value (without entities processing) + * + * Try to validate a single namespace declaration for an element + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: Attribute Value Type ] + * - [ VC: Fixed Attribute Default ] + * - [ VC: Entity Name ] + * - [ VC: Name Token ] + * - [ VC: ID ] + * - [ VC: IDREF ] + * - [ VC: Entity Name ] + * - [ VC: Notation Attributes ] + * + * The ID/IDREF uniqueness and matching are done separately + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateOneNamespace(xmlValidCtxtPtr ctxt, xmlDocPtr doc, +xmlNodePtr elem, const xmlChar *prefix, xmlNsPtr ns, const xmlChar *value) { + /* xmlElementPtr elemDecl; */ + xmlAttributePtr attrDecl = NULL; + int val; + int ret = 1; + + CHECK_DTD; + if ((elem == NULL) || (elem->name == NULL)) return(0); + if ((ns == NULL) || (ns->href == NULL)) return(0); + + if (prefix != NULL) { + xmlChar qname[500]; + snprintf((char *) qname, sizeof(qname), "%s:%s", + prefix, elem->name); + qname[sizeof(qname) - 1] = 0; + if (ns->prefix != NULL) { + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, qname, + ns->prefix, BAD_CAST "xmlns"); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, qname, + ns->prefix, BAD_CAST "xmlns"); + } else { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, + BAD_CAST "xmlns"); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, qname, + BAD_CAST "xmlns"); + } + } + if (attrDecl == NULL) { + if (ns->prefix != NULL) { + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, elem->name, + ns->prefix, BAD_CAST "xmlns"); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, elem->name, + ns->prefix, BAD_CAST "xmlns"); + } else { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, + elem->name, BAD_CAST "xmlns"); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, + elem->name, BAD_CAST "xmlns"); + } + } + + + /* Validity Constraint: Attribute Value Type */ + if (attrDecl == NULL) { + VECTXT(ctxt, elem); + if (ns->prefix != NULL) { + VERROR(ctxt->userData, + "No declaration for attribute xmlns:%s of element %s\n", + ns->prefix, elem->name); + } else { + VERROR(ctxt->userData, + "No declaration for attribute xmlns of element %s\n", + elem->name); + } + return(0); + } + + val = xmlValidateAttributeValue(attrDecl->atype, value); + if (val == 0) { + VECTXT(ctxt, elem); + if (ns->prefix != NULL) { + VERROR(ctxt->userData, + "Syntax of value for attribute xmlns:%s of %s is not valid\n", + ns->prefix, elem->name); + } else { + VERROR(ctxt->userData, + "Syntax of value for attribute xmlns of %s is not valid\n", + elem->name); + } + ret = 0; + } + + /* Validity constraint: Fixed Attribute Default */ + if (attrDecl->def == XML_ATTRIBUTE_FIXED) { + if (!xmlStrEqual(value, attrDecl->defaultValue)) { + VECTXT(ctxt, elem); + if (ns->prefix != NULL) { + VERROR(ctxt->userData, + "Value for attribute xmlns:%s of %s is different from default \"%s\"\n", + ns->prefix, elem->name, attrDecl->defaultValue); + } else { + VERROR(ctxt->userData, + "Value for attribute xmlns of %s is different from default \"%s\"\n", + elem->name, attrDecl->defaultValue); + } + ret = 0; + } + } + + /* Validity Constraint: ID uniqueness */ + if (attrDecl->atype == XML_ATTRIBUTE_ID) { + if (xmlAddID(ctxt, doc, value, (xmlAttrPtr) ns) == NULL) + ret = 0; + } + + if ((attrDecl->atype == XML_ATTRIBUTE_IDREF) || + (attrDecl->atype == XML_ATTRIBUTE_IDREFS)) { + if (xmlAddRef(ctxt, doc, value, (xmlAttrPtr) ns) == NULL) + ret = 0; + } + + /* Validity Constraint: Notation Attributes */ + if (attrDecl->atype == XML_ATTRIBUTE_NOTATION) { + xmlEnumerationPtr tree = attrDecl->tree; + xmlNotationPtr nota; + + /* First check that the given NOTATION was declared */ + nota = xmlGetDtdNotationDesc(doc->intSubset, value); + if (nota == NULL) + nota = xmlGetDtdNotationDesc(doc->extSubset, value); + + if (nota == NULL) { + VECTXT(ctxt, elem); + if (ns->prefix != NULL) { + VERROR(ctxt->userData, + "Value \"%s\" for attribute xmlns:%s of %s is not a declared Notation\n", + value, ns->prefix, elem->name); + } else { + VERROR(ctxt->userData, + "Value \"%s\" for attribute xmlns of %s is not a declared Notation\n", + value, elem->name); + } + ret = 0; + } + + /* Second, verify that it's among the list */ + while (tree != NULL) { + if (xmlStrEqual(tree->name, value)) break; + tree = tree->next; + } + if (tree == NULL) { + VECTXT(ctxt, elem); + if (ns->prefix != NULL) { + VERROR(ctxt->userData, +"Value \"%s\" for attribute xmlns:%s of %s is not among the enumerated notations\n", + value, ns->prefix, elem->name); + } else { + VERROR(ctxt->userData, +"Value \"%s\" for attribute xmlns of %s is not among the enumerated notations\n", + value, elem->name); + } + ret = 0; + } + } + + /* Validity Constraint: Enumeration */ + if (attrDecl->atype == XML_ATTRIBUTE_ENUMERATION) { + xmlEnumerationPtr tree = attrDecl->tree; + while (tree != NULL) { + if (xmlStrEqual(tree->name, value)) break; + tree = tree->next; + } + if (tree == NULL) { + VECTXT(ctxt, elem); + if (ns->prefix != NULL) { + VERROR(ctxt->userData, +"Value \"%s\" for attribute xmlns:%s of %s is not among the enumerated set\n", + value, ns->prefix, elem->name); + } else { + VERROR(ctxt->userData, +"Value \"%s\" for attribute xmlns of %s is not among the enumerated set\n", + value, elem->name); + } + ret = 0; + } + } + + /* Fixed Attribute Default */ + if ((attrDecl->def == XML_ATTRIBUTE_FIXED) && + (!xmlStrEqual(attrDecl->defaultValue, value))) { + VECTXT(ctxt, elem); + if (ns->prefix != NULL) { + VERROR(ctxt->userData, + "Value for attribute xmlns:%s of %s must be \"%s\"\n", + ns->prefix, elem->name, attrDecl->defaultValue); + } else { + VERROR(ctxt->userData, + "Value for attribute xmlns of %s must be \"%s\"\n", + elem->name, attrDecl->defaultValue); + } + ret = 0; + } + + /* Extra check for the attribute value */ + if (ns->prefix != NULL) { + ret &= xmlValidateAttributeValue2(ctxt, doc, ns->prefix, + attrDecl->atype, value); + } else { + ret &= xmlValidateAttributeValue2(ctxt, doc, BAD_CAST "xmlns", + attrDecl->atype, value); + } + + return(ret); +} + +#ifndef LIBXML_REGEXP_ENABLED +/** + * xmlValidateSkipIgnorable: + * @ctxt: the validation context + * @child: the child list + * + * Skip ignorable elements w.r.t. the validation process + * + * returns the first element to consider for validation of the content model + */ + +static xmlNodePtr +xmlValidateSkipIgnorable(xmlNodePtr child) { + while (child != NULL) { + switch (child->type) { + /* These things are ignored (skipped) during validation. */ + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + child = child->next; + break; + case XML_TEXT_NODE: + if (xmlIsBlankNode(child)) + child = child->next; + else + return(child); + break; + /* keep current node */ + default: + return(child); + } + } + return(child); +} + +/** + * xmlValidateElementType: + * @ctxt: the validation context + * + * Try to validate the content model of an element internal function + * + * returns 1 if valid or 0 ,-1 in case of error, -2 if an entity + * reference is found and -3 if the validation succeeded but + * the content model is not determinist. + */ + +static int +xmlValidateElementType(xmlValidCtxtPtr ctxt) { + int ret = -1; + int determinist = 1; + + NODE = xmlValidateSkipIgnorable(NODE); + if ((NODE == NULL) && (CONT == NULL)) + return(1); + if ((NODE == NULL) && + ((CONT->ocur == XML_ELEMENT_CONTENT_MULT) || + (CONT->ocur == XML_ELEMENT_CONTENT_OPT))) { + return(1); + } + if (CONT == NULL) return(-1); + if ((NODE != NULL) && (NODE->type == XML_ENTITY_REF_NODE)) + return(-2); + + /* + * We arrive here when more states need to be examined + */ +cont: + + /* + * We just recovered from a rollback generated by a possible + * epsilon transition, go directly to the analysis phase + */ + if (STATE == ROLLBACK_PARENT) { + DEBUG_VALID_MSG("restored parent branch"); + DEBUG_VALID_STATE(NODE, CONT) + ret = 1; + goto analyze; + } + + DEBUG_VALID_STATE(NODE, CONT) + /* + * we may have to save a backup state here. This is the equivalent + * of handling epsilon transition in NFAs. + */ + if ((CONT != NULL) && + ((CONT->parent == NULL) || + (CONT->parent->type != XML_ELEMENT_CONTENT_OR)) && + ((CONT->ocur == XML_ELEMENT_CONTENT_MULT) || + (CONT->ocur == XML_ELEMENT_CONTENT_OPT) || + ((CONT->ocur == XML_ELEMENT_CONTENT_PLUS) && (OCCURRENCE)))) { + DEBUG_VALID_MSG("saving parent branch"); + if (vstateVPush(ctxt, CONT, NODE, DEPTH, OCCURS, ROLLBACK_PARENT) < 0) + return(0); + } + + + /* + * Check first if the content matches + */ + switch (CONT->type) { + case XML_ELEMENT_CONTENT_PCDATA: + if (NODE == NULL) { + DEBUG_VALID_MSG("pcdata failed no node"); + ret = 0; + break; + } + if (NODE->type == XML_TEXT_NODE) { + DEBUG_VALID_MSG("pcdata found, skip to next"); + /* + * go to next element in the content model + * skipping ignorable elems + */ + do { + NODE = NODE->next; + NODE = xmlValidateSkipIgnorable(NODE); + if ((NODE != NULL) && + (NODE->type == XML_ENTITY_REF_NODE)) + return(-2); + } while ((NODE != NULL) && + ((NODE->type != XML_ELEMENT_NODE) && + (NODE->type != XML_TEXT_NODE) && + (NODE->type != XML_CDATA_SECTION_NODE))); + ret = 1; + break; + } else { + DEBUG_VALID_MSG("pcdata failed"); + ret = 0; + break; + } + break; + case XML_ELEMENT_CONTENT_ELEMENT: + if (NODE == NULL) { + DEBUG_VALID_MSG("element failed no node"); + ret = 0; + break; + } + ret = ((NODE->type == XML_ELEMENT_NODE) && + (xmlStrEqual(NODE->name, CONT->name))); + if (ret == 1) { + if ((NODE->ns == NULL) || (NODE->ns->prefix == NULL)) { + ret = (CONT->prefix == NULL); + } else if (CONT->prefix == NULL) { + ret = 0; + } else { + ret = xmlStrEqual(NODE->ns->prefix, CONT->prefix); + } + } + if (ret == 1) { + DEBUG_VALID_MSG("element found, skip to next"); + /* + * go to next element in the content model + * skipping ignorable elems + */ + do { + NODE = NODE->next; + NODE = xmlValidateSkipIgnorable(NODE); + if ((NODE != NULL) && + (NODE->type == XML_ENTITY_REF_NODE)) + return(-2); + } while ((NODE != NULL) && + ((NODE->type != XML_ELEMENT_NODE) && + (NODE->type != XML_TEXT_NODE) && + (NODE->type != XML_CDATA_SECTION_NODE))); + } else { + DEBUG_VALID_MSG("element failed"); + ret = 0; + break; + } + break; + case XML_ELEMENT_CONTENT_OR: + /* + * Small optimization. + */ + if (CONT->c1->type == XML_ELEMENT_CONTENT_ELEMENT) { + if ((NODE == NULL) || + (!xmlStrEqual(NODE->name, CONT->c1->name))) { + DEPTH++; + CONT = CONT->c2; + goto cont; + } + if ((NODE->ns == NULL) || (NODE->ns->prefix == NULL)) { + ret = (CONT->c1->prefix == NULL); + } else if (CONT->c1->prefix == NULL) { + ret = 0; + } else { + ret = xmlStrEqual(NODE->ns->prefix, CONT->c1->prefix); + } + if (ret == 0) { + DEPTH++; + CONT = CONT->c2; + goto cont; + } + } + + /* + * save the second branch 'or' branch + */ + DEBUG_VALID_MSG("saving 'or' branch"); + if (vstateVPush(ctxt, CONT->c2, NODE, (unsigned char)(DEPTH + 1), + OCCURS, ROLLBACK_OR) < 0) + return(-1); + DEPTH++; + CONT = CONT->c1; + goto cont; + case XML_ELEMENT_CONTENT_SEQ: + /* + * Small optimization. + */ + if ((CONT->c1->type == XML_ELEMENT_CONTENT_ELEMENT) && + ((CONT->c1->ocur == XML_ELEMENT_CONTENT_OPT) || + (CONT->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { + if ((NODE == NULL) || + (!xmlStrEqual(NODE->name, CONT->c1->name))) { + DEPTH++; + CONT = CONT->c2; + goto cont; + } + if ((NODE->ns == NULL) || (NODE->ns->prefix == NULL)) { + ret = (CONT->c1->prefix == NULL); + } else if (CONT->c1->prefix == NULL) { + ret = 0; + } else { + ret = xmlStrEqual(NODE->ns->prefix, CONT->c1->prefix); + } + if (ret == 0) { + DEPTH++; + CONT = CONT->c2; + goto cont; + } + } + DEPTH++; + CONT = CONT->c1; + goto cont; + } + + /* + * At this point handle going up in the tree + */ + if (ret == -1) { + DEBUG_VALID_MSG("error found returning"); + return(ret); + } +analyze: + while (CONT != NULL) { + /* + * First do the analysis depending on the occurrence model at + * this level. + */ + if (ret == 0) { + switch (CONT->ocur) { + xmlNodePtr cur; + + case XML_ELEMENT_CONTENT_ONCE: + cur = ctxt->vstate->node; + DEBUG_VALID_MSG("Once branch failed, rollback"); + if (vstateVPop(ctxt) < 0 ) { + DEBUG_VALID_MSG("exhaustion, failed"); + return(0); + } + if (cur != ctxt->vstate->node) + determinist = -3; + goto cont; + case XML_ELEMENT_CONTENT_PLUS: + if (OCCURRENCE == 0) { + cur = ctxt->vstate->node; + DEBUG_VALID_MSG("Plus branch failed, rollback"); + if (vstateVPop(ctxt) < 0 ) { + DEBUG_VALID_MSG("exhaustion, failed"); + return(0); + } + if (cur != ctxt->vstate->node) + determinist = -3; + goto cont; + } + DEBUG_VALID_MSG("Plus branch found"); + ret = 1; + break; + case XML_ELEMENT_CONTENT_MULT: +#ifdef DEBUG_VALID_ALGO + if (OCCURRENCE == 0) { + DEBUG_VALID_MSG("Mult branch failed"); + } else { + DEBUG_VALID_MSG("Mult branch found"); + } +#endif + ret = 1; + break; + case XML_ELEMENT_CONTENT_OPT: + DEBUG_VALID_MSG("Option branch failed"); + ret = 1; + break; + } + } else { + switch (CONT->ocur) { + case XML_ELEMENT_CONTENT_OPT: + DEBUG_VALID_MSG("Option branch succeeded"); + ret = 1; + break; + case XML_ELEMENT_CONTENT_ONCE: + DEBUG_VALID_MSG("Once branch succeeded"); + ret = 1; + break; + case XML_ELEMENT_CONTENT_PLUS: + if (STATE == ROLLBACK_PARENT) { + DEBUG_VALID_MSG("Plus branch rollback"); + ret = 1; + break; + } + if (NODE == NULL) { + DEBUG_VALID_MSG("Plus branch exhausted"); + ret = 1; + break; + } + DEBUG_VALID_MSG("Plus branch succeeded, continuing"); + SET_OCCURRENCE; + goto cont; + case XML_ELEMENT_CONTENT_MULT: + if (STATE == ROLLBACK_PARENT) { + DEBUG_VALID_MSG("Mult branch rollback"); + ret = 1; + break; + } + if (NODE == NULL) { + DEBUG_VALID_MSG("Mult branch exhausted"); + ret = 1; + break; + } + DEBUG_VALID_MSG("Mult branch succeeded, continuing"); + /* SET_OCCURRENCE; */ + goto cont; + } + } + STATE = 0; + + /* + * Then act accordingly at the parent level + */ + RESET_OCCURRENCE; + if (CONT->parent == NULL) + break; + + switch (CONT->parent->type) { + case XML_ELEMENT_CONTENT_PCDATA: + DEBUG_VALID_MSG("Error: parent pcdata"); + return(-1); + case XML_ELEMENT_CONTENT_ELEMENT: + DEBUG_VALID_MSG("Error: parent element"); + return(-1); + case XML_ELEMENT_CONTENT_OR: + if (ret == 1) { + DEBUG_VALID_MSG("Or succeeded"); + CONT = CONT->parent; + DEPTH--; + } else { + DEBUG_VALID_MSG("Or failed"); + CONT = CONT->parent; + DEPTH--; + } + break; + case XML_ELEMENT_CONTENT_SEQ: + if (ret == 0) { + DEBUG_VALID_MSG("Sequence failed"); + CONT = CONT->parent; + DEPTH--; + } else if (CONT == CONT->parent->c1) { + DEBUG_VALID_MSG("Sequence testing 2nd branch"); + CONT = CONT->parent->c2; + goto cont; + } else { + DEBUG_VALID_MSG("Sequence succeeded"); + CONT = CONT->parent; + DEPTH--; + } + } + } + if (NODE != NULL) { + xmlNodePtr cur; + + cur = ctxt->vstate->node; + DEBUG_VALID_MSG("Failed, remaining input, rollback"); + if (vstateVPop(ctxt) < 0 ) { + DEBUG_VALID_MSG("exhaustion, failed"); + return(0); + } + if (cur != ctxt->vstate->node) + determinist = -3; + goto cont; + } + if (ret == 0) { + xmlNodePtr cur; + + cur = ctxt->vstate->node; + DEBUG_VALID_MSG("Failure, rollback"); + if (vstateVPop(ctxt) < 0 ) { + DEBUG_VALID_MSG("exhaustion, failed"); + return(0); + } + if (cur != ctxt->vstate->node) + determinist = -3; + goto cont; + } + return(determinist); +} +#endif + +/** + * xmlSnprintfElements: + * @buf: an output buffer + * @size: the size of the buffer + * @content: An element + * @glob: 1 if one must print the englobing parenthesis, 0 otherwise + * + * This will dump the list of elements to the buffer + * Intended just for the debug routine + */ +static void +xmlSnprintfElements(char *buf, int size, xmlNodePtr node, int glob) { + xmlNodePtr cur; + int len; + + if (node == NULL) return; + if (glob) strcat(buf, "("); + cur = node; + while (cur != NULL) { + len = strlen(buf); + if (size - len < 50) { + if ((size - len > 4) && (buf[len - 1] != '.')) + strcat(buf, " ..."); + return; + } + switch (cur->type) { + case XML_ELEMENT_NODE: + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + if (size - len < xmlStrlen(cur->ns->prefix) + 10) { + if ((size - len > 4) && (buf[len - 1] != '.')) + strcat(buf, " ..."); + return; + } + strcat(buf, (char *) cur->ns->prefix); + strcat(buf, ":"); + } + if (size - len < xmlStrlen(cur->name) + 10) { + if ((size - len > 4) && (buf[len - 1] != '.')) + strcat(buf, " ..."); + return; + } + strcat(buf, (char *) cur->name); + if (cur->next != NULL) + strcat(buf, " "); + break; + case XML_TEXT_NODE: + if (xmlIsBlankNode(cur)) + break; + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + strcat(buf, "CDATA"); + if (cur->next != NULL) + strcat(buf, " "); + break; + case XML_ATTRIBUTE_NODE: + case XML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_NAMESPACE_DECL: + strcat(buf, "???"); + if (cur->next != NULL) + strcat(buf, " "); + break; + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_DTD_NODE: + case XML_COMMENT_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + break; + } + cur = cur->next; + } + if (glob) strcat(buf, ")"); +} + +/** + * xmlValidateElementContent: + * @ctxt: the validation context + * @child: the child list + * @elemDecl: pointer to the element declaration + * @warn: emit the error message + * @parent: the parent element (for error reporting) + * + * Try to validate the content model of an element + * + * returns 1 if valid or 0 if not and -1 in case of error + */ + +static int +xmlValidateElementContent(xmlValidCtxtPtr ctxt, xmlNodePtr child, + xmlElementPtr elemDecl, int warn, xmlNodePtr parent) { + int ret = 1; +#ifndef LIBXML_REGEXP_ENABLED + xmlNodePtr repl = NULL, last = NULL, tmp; +#endif + xmlNodePtr cur; + xmlElementContentPtr cont; + const xmlChar *name; + + if (elemDecl == NULL) + return(-1); + cont = elemDecl->content; + name = elemDecl->name; + +#ifdef LIBXML_REGEXP_ENABLED + /* Build the regexp associated to the content model */ + if (elemDecl->contModel == NULL) + ret = xmlValidBuildContentModel(ctxt, elemDecl); + if (elemDecl->contModel == NULL) { + ret = -1; + } else { + xmlRegExecCtxtPtr exec; + + ctxt->nodeMax = 0; + ctxt->nodeNr = 0; + ctxt->nodeTab = NULL; + exec = xmlRegNewExecCtxt(elemDecl->contModel, NULL, NULL); + if (exec != NULL) { + cur = child; + while (cur != NULL) { + switch (cur->type) { + case XML_ENTITY_REF_NODE: + /* + * Push the current node to be able to roll back + * and process within the entity + */ + if ((cur->children != NULL) && + (cur->children->children != NULL)) { + nodeVPush(ctxt, cur); + cur = cur->children->children; + continue; + } + break; + case XML_TEXT_NODE: + if (xmlIsBlankNode(cur)) + break; + ret = 0; + goto fail; + case XML_CDATA_SECTION_NODE: + TODO + ret = 0; + goto fail; + case XML_ELEMENT_NODE: + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlChar *QName; + int len; + + len = xmlStrlen(cur->name) + + xmlStrlen(cur->ns->prefix) + 2; + QName = xmlMalloc(len); + if (QName == NULL) { + ret = -1; + goto fail; + } + snprintf((char *) QName, len, "%s:%s", + (char *)cur->ns->prefix, + (char *)cur->name); + ret = xmlRegExecPushString(exec, QName, NULL); + xmlFree(QName); + } else { + ret = xmlRegExecPushString(exec, cur->name, NULL); + } + break; + default: + break; + } + /* + * Switch to next element + */ + cur = cur->next; + while (cur == NULL) { + cur = nodeVPop(ctxt); + if (cur == NULL) + break; + cur = cur->next; + } + } + ret = xmlRegExecPushString(exec, NULL, NULL); +fail: + xmlRegFreeExecCtxt(exec); + } + } +#else /* LIBXML_REGEXP_ENABLED */ + /* + * Allocate the stack + */ + ctxt->vstateMax = 8; + ctxt->vstateTab = (xmlValidState *) xmlMalloc( + ctxt->vstateMax * sizeof(ctxt->vstateTab[0])); + if (ctxt->vstateTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !n"); + return(-1); + } + /* + * The first entry in the stack is reserved to the current state + */ + ctxt->nodeMax = 0; + ctxt->nodeNr = 0; + ctxt->nodeTab = NULL; + ctxt->vstate = &ctxt->vstateTab[0]; + ctxt->vstateNr = 1; + CONT = cont; + NODE = child; + DEPTH = 0; + OCCURS = 0; + STATE = 0; + ret = xmlValidateElementType(ctxt); + if ((ret == -3) && (warn)) { + VWARNING(ctxt->userData, + "Content model for Element %s is ambiguous\n", name); + } else if (ret == -2) { + /* + * An entities reference appeared at this level. + * Buid a minimal representation of this node content + * sufficient to run the validation process on it + */ + DEBUG_VALID_MSG("Found an entity reference, linearizing"); + cur = child; + while (cur != NULL) { + switch (cur->type) { + case XML_ENTITY_REF_NODE: + /* + * Push the current node to be able to roll back + * and process within the entity + */ + if ((cur->children != NULL) && + (cur->children->children != NULL)) { + nodeVPush(ctxt, cur); + cur = cur->children->children; + continue; + } + break; + case XML_TEXT_NODE: + if (xmlIsBlankNode(cur)) + break; + /* no break on purpose */ + case XML_CDATA_SECTION_NODE: + /* no break on purpose */ + case XML_ELEMENT_NODE: + /* + * Allocate a new node and minimally fills in + * what's required + */ + tmp = (xmlNodePtr) xmlMalloc(sizeof(xmlNode)); + if (tmp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlValidateElementContent : malloc failed\n"); + xmlFreeNodeList(repl); + ret = -1; + goto done; + } + tmp->type = cur->type; + tmp->name = cur->name; + tmp->ns = cur->ns; + tmp->next = NULL; + tmp->content = NULL; + if (repl == NULL) + repl = last = tmp; + else { + last->next = tmp; + last = tmp; + } + if (cur->type == XML_CDATA_SECTION_NODE) { + /* + * E59 spaces in CDATA does not match the + * nonterminal S + */ + tmp->content = xmlStrdup(BAD_CAST "CDATA"); + } + break; + default: + break; + } + /* + * Switch to next element + */ + cur = cur->next; + while (cur == NULL) { + cur = nodeVPop(ctxt); + if (cur == NULL) + break; + cur = cur->next; + } + } + + /* + * Relaunch the validation + */ + ctxt->vstate = &ctxt->vstateTab[0]; + ctxt->vstateNr = 1; + CONT = cont; + NODE = repl; + DEPTH = 0; + OCCURS = 0; + STATE = 0; + ret = xmlValidateElementType(ctxt); + } +#endif /* LIBXML_REGEXP_ENABLED */ + if ((warn) && ((ret != 1) && (ret != -3))) { + if ((ctxt != NULL) && (ctxt->warning != NULL)) { + char expr[5000]; + char list[5000]; + + expr[0] = 0; + xmlSnprintfElementContent(expr, 5000, cont, 1); + list[0] = 0; +#ifndef LIBXML_REGEXP_ENABLED + if (repl != NULL) + xmlSnprintfElements(list, 5000, repl, 1); + else +#endif /* LIBXML_REGEXP_ENABLED */ + xmlSnprintfElements(list, 5000, child, 1); + + if (name != NULL) { + if (parent != NULL) VECTXT(ctxt, parent); + VERROR(ctxt->userData, + "Element %s content does not follow the DTD\nExpecting %s, got %s\n", + name, expr, list); + } else { + if (parent != NULL) VECTXT(ctxt, parent); + VERROR(ctxt->userData, + "Element content does not follow the DTD\nExpecting %s, got %s\n", + expr, list); + } + } else { + if (name != NULL) { + if (parent != NULL) VECTXT(ctxt, parent); + VERROR(ctxt->userData, + "Element %s content does not follow the DTD\n", + name); + } else { + if (parent != NULL) VECTXT(ctxt, parent); + VERROR(ctxt->userData, + "Element content does not follow the DTD\n"); + } + } + ret = 0; + } + if (ret == -3) + ret = 1; + +#ifndef LIBXML_REGEXP_ENABLED +done: + /* + * Deallocate the copy if done, and free up the validation stack + */ + while (repl != NULL) { + tmp = repl->next; + xmlFree(repl); + repl = tmp; + } + ctxt->vstateMax = 0; + if (ctxt->vstateTab != NULL) { + xmlFree(ctxt->vstateTab); + ctxt->vstateTab = NULL; + } +#endif + ctxt->nodeMax = 0; + ctxt->nodeNr = 0; + if (ctxt->nodeTab != NULL) { + xmlFree(ctxt->nodeTab); + ctxt->nodeTab = NULL; + } + return(ret); + +} + +/** + * xmlValidateCdataElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * + * Check that an element follows #CDATA + * + * returns 1 if valid or 0 otherwise + */ +static int +xmlValidateOneCdataElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem) { + int ret = 1; + xmlNodePtr cur, child; + + if ((ctxt == NULL) || (doc == NULL) || (elem == NULL)) + return(0); + + child = elem->children; + + cur = child; + while (cur != NULL) { + switch (cur->type) { + case XML_ENTITY_REF_NODE: + /* + * Push the current node to be able to roll back + * and process within the entity + */ + if ((cur->children != NULL) && + (cur->children->children != NULL)) { + nodeVPush(ctxt, cur); + cur = cur->children->children; + continue; + } + break; + case XML_COMMENT_NODE: + case XML_PI_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + break; + default: + ret = 0; + goto done; + } + /* + * Switch to next element + */ + cur = cur->next; + while (cur == NULL) { + cur = nodeVPop(ctxt); + if (cur == NULL) + break; + cur = cur->next; + } + } +done: + ctxt->nodeMax = 0; + ctxt->nodeNr = 0; + if (ctxt->nodeTab != NULL) { + xmlFree(ctxt->nodeTab); + ctxt->nodeTab = NULL; + } + return(ret); +} + +/** + * xmlValidateOneElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * + * Try to validate a single element and it's attributes, + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: Element Valid ] + * - [ VC: Required Attribute ] + * Then call xmlValidateOneAttribute() for each attribute present. + * + * The ID/IDREF checkings are done separately + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem) { + xmlElementPtr elemDecl = NULL; + xmlElementContentPtr cont; + xmlAttributePtr attr; + xmlNodePtr child; + int ret = 1, tmp; + const xmlChar *name; + const xmlChar *prefix = NULL; + int extsubset = 0; + + CHECK_DTD; + + if (elem == NULL) return(0); + switch (elem->type) { + case XML_ATTRIBUTE_NODE: + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Attribute element not expected here\n"); + return(0); + case XML_TEXT_NODE: + if (elem->children != NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, "Text element has childs !\n"); + return(0); + } + if (elem->properties != NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, "Text element has attributes !\n"); + return(0); + } + if (elem->ns != NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, "Text element has namespace !\n"); + return(0); + } + if (elem->nsDef != NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Text element carries namespace definitions !\n"); + return(0); + } + if (elem->content == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Text element has no content !\n"); + return(0); + } + return(1); + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return(1); + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + return(1); + case XML_ENTITY_NODE: + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Entity element not expected here\n"); + return(0); + case XML_NOTATION_NODE: + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Notation element not expected here\n"); + return(0); + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Document element not expected here\n"); + return(0); + case XML_HTML_DOCUMENT_NODE: + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "\n"); + return(0); + case XML_ELEMENT_NODE: + break; + default: + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "unknown element type %d\n", elem->type); + return(0); + } + if (elem->name == NULL) return(0); + + /* + * Fetch the declaration for the qualified name + */ + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) + prefix = elem->ns->prefix; + + if (prefix != NULL) { + elemDecl = xmlGetDtdQElementDesc(doc->intSubset, + elem->name, prefix); + if ((elemDecl == NULL) && (doc->extSubset != NULL)) { + elemDecl = xmlGetDtdQElementDesc(doc->extSubset, + elem->name, prefix); + if (elemDecl != NULL) + extsubset = 1; + } + } + + /* + * Fetch the declaration for the non qualified name + * This is "non-strict" validation should be done on the + * full QName but in that case being flexible makes sense. + */ + if (elemDecl == NULL) { + elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name); + if ((elemDecl == NULL) && (doc->extSubset != NULL)) { + elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name); + if (elemDecl != NULL) + extsubset = 1; + } + } + if (elemDecl == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, "No declaration for element %s\n", + elem->name); + return(0); + } + + /* Check that the element content matches the definition */ + switch (elemDecl->etype) { + case XML_ELEMENT_TYPE_UNDEFINED: + VECTXT(ctxt, elem); + VERROR(ctxt->userData, "No declaration for element %s\n", + elem->name); + return(0); + case XML_ELEMENT_TYPE_EMPTY: + if (elem->children != NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Element %s was declared EMPTY this one has content\n", + elem->name); + ret = 0; + } + break; + case XML_ELEMENT_TYPE_ANY: + /* I don't think anything is required then */ + break; + case XML_ELEMENT_TYPE_MIXED: + + /* simple case of declared as #PCDATA */ + if ((elemDecl->content != NULL) && + (elemDecl->content->type == XML_ELEMENT_CONTENT_PCDATA)) { + ret = xmlValidateOneCdataElement(ctxt, doc, elem); + if (!ret) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Element %s was declared #PCDATA but contains non text nodes\n", + elem->name); + } + break; + } + child = elem->children; + /* Hum, this start to get messy */ + while (child != NULL) { + if (child->type == XML_ELEMENT_NODE) { + name = child->name; + if ((child->ns != NULL) && (child->ns->prefix != NULL)) { + xmlChar qname[500]; + snprintf((char *) qname, sizeof(qname), "%s:%s", + child->ns->prefix, child->name); + qname[sizeof(qname) - 1] = 0; + cont = elemDecl->content; + while (cont != NULL) { + if (cont->type == XML_ELEMENT_CONTENT_ELEMENT) { + if (xmlStrEqual(cont->name, qname)) break; + } else if ((cont->type == XML_ELEMENT_CONTENT_OR) && + (cont->c1 != NULL) && + (cont->c1->type == XML_ELEMENT_CONTENT_ELEMENT)){ + if (xmlStrEqual(cont->c1->name, qname)) break; + } else if ((cont->type != XML_ELEMENT_CONTENT_OR) || + (cont->c1 == NULL) || + (cont->c1->type != XML_ELEMENT_CONTENT_PCDATA)){ + /* Internal error !!! */ + xmlGenericError(xmlGenericErrorContext, + "Internal: MIXED struct bad\n"); + break; + } + cont = cont->c2; + } + if (cont != NULL) + goto child_ok; + } + cont = elemDecl->content; + while (cont != NULL) { + if (cont->type == XML_ELEMENT_CONTENT_ELEMENT) { + if (xmlStrEqual(cont->name, name)) break; + } else if ((cont->type == XML_ELEMENT_CONTENT_OR) && + (cont->c1 != NULL) && + (cont->c1->type == XML_ELEMENT_CONTENT_ELEMENT)) { + if (xmlStrEqual(cont->c1->name, name)) break; + } else if ((cont->type != XML_ELEMENT_CONTENT_OR) || + (cont->c1 == NULL) || + (cont->c1->type != XML_ELEMENT_CONTENT_PCDATA)) { + /* Internal error !!! */ + xmlGenericError(xmlGenericErrorContext, + "Internal: MIXED struct bad\n"); + break; + } + cont = cont->c2; + } + if (cont == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Element %s is not declared in %s list of possible children\n", + name, elem->name); + ret = 0; + } + } +child_ok: + child = child->next; + } + break; + case XML_ELEMENT_TYPE_ELEMENT: + if ((doc->standalone == 1) && (extsubset == 1)) { + /* + * VC: Standalone Document Declaration + * - element types with element content, if white space + * occurs directly within any instance of those types. + */ + child = elem->children; + while (child != NULL) { + if (child->type == XML_TEXT_NODE) { + const xmlChar *content = child->content; + + while (IS_BLANK(*content)) + content++; + if (*content == 0) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, +"standalone: %s declared in the external subset contains white spaces nodes\n", + elem->name); + ret = 0; + break; + } + } + child =child->next; + } + } + child = elem->children; + cont = elemDecl->content; + tmp = xmlValidateElementContent(ctxt, child, elemDecl, 1, elem); + if (tmp <= 0) + ret = tmp; + break; + } + + /* [ VC: Required Attribute ] */ + attr = elemDecl->attributes; + while (attr != NULL) { + if (attr->def == XML_ATTRIBUTE_REQUIRED) { + int qualified = -1; + + if ((attr->prefix == NULL) && + (xmlStrEqual(attr->name, BAD_CAST "xmlns"))) { + xmlNsPtr ns; + + ns = elem->nsDef; + while (ns != NULL) { + if (ns->prefix == NULL) + goto found; + ns = ns->next; + } + } else if (xmlStrEqual(attr->prefix, BAD_CAST "xmlns")) { + xmlNsPtr ns; + + ns = elem->nsDef; + while (ns != NULL) { + if (xmlStrEqual(attr->name, ns->prefix)) + goto found; + ns = ns->next; + } + } else { + xmlAttrPtr attrib; + + attrib = elem->properties; + while (attrib != NULL) { + if (xmlStrEqual(attrib->name, attr->name)) { + if (attr->prefix != NULL) { + xmlNsPtr nameSpace = attrib->ns; + + if (nameSpace == NULL) + nameSpace = elem->ns; + /* + * qualified names handling is problematic, having a + * different prefix should be possible but DTDs don't + * allow to define the URI instead of the prefix :-( + */ + if (nameSpace == NULL) { + if (qualified < 0) + qualified = 0; + } else if (!xmlStrEqual(nameSpace->prefix, + attr->prefix)) { + if (qualified < 1) + qualified = 1; + } else + goto found; + } else { + /* + * We should allow applications to define namespaces + * for their application even if the DTD doesn't + * carry one, otherwise, basically we would always + * break. + */ + goto found; + } + } + attrib = attrib->next; + } + } + if (qualified == -1) { + if (attr->prefix == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Element %s does not carry attribute %s\n", + elem->name, attr->name); + ret = 0; + } else { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Element %s does not carry attribute %s:%s\n", + elem->name, attr->prefix,attr->name); + ret = 0; + } + } else if (qualified == 0) { + VWARNING(ctxt->userData, + "Element %s required attribute %s:%s has no prefix\n", + elem->name, attr->prefix,attr->name); + } else if (qualified == 1) { + VWARNING(ctxt->userData, + "Element %s required attribute %s:%s has different prefix\n", + elem->name, attr->prefix,attr->name); + } + } else if (attr->def == XML_ATTRIBUTE_FIXED) { + /* + * Special tests checking #FIXED namespace declarations + * have the right value since this is not done as an + * attribute checking + */ + if ((attr->prefix == NULL) && + (xmlStrEqual(attr->name, BAD_CAST "xmlns"))) { + xmlNsPtr ns; + + ns = elem->nsDef; + while (ns != NULL) { + if (ns->prefix == NULL) { + if (!xmlStrEqual(attr->defaultValue, ns->href)) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Element %s namespace name for default namespace does not match the DTD\n", + elem->name); + ret = 0; + } + goto found; + } + ns = ns->next; + } + } else if (xmlStrEqual(attr->prefix, BAD_CAST "xmlns")) { + xmlNsPtr ns; + + ns = elem->nsDef; + while (ns != NULL) { + if (xmlStrEqual(attr->name, ns->prefix)) { + if (!xmlStrEqual(attr->defaultValue, ns->href)) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, + "Element %s namespace name for %s does not match the DTD\n", + elem->name, ns->prefix); + ret = 0; + } + goto found; + } + ns = ns->next; + } + } + } +found: + attr = attr->nexth; + } + return(ret); +} + +/** + * xmlValidateRoot: + * @ctxt: the validation context + * @doc: a document instance + * + * Try to validate a the root element + * basically it does the following check as described by the + * XML-1.0 recommendation: + * - [ VC: Root Element Type ] + * it doesn't try to recurse or apply other check to the element + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateRoot(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + xmlNodePtr root; + if (doc == NULL) return(0); + + root = xmlDocGetRootElement(doc); + if ((root == NULL) || (root->name == NULL)) { + VERROR(ctxt->userData, "Not valid: no root element\n"); + return(0); + } + + /* + * When doing post validation against a separate DTD, those may + * no internal subset has been generated + */ + if ((doc->intSubset != NULL) && + (doc->intSubset->name != NULL)) { + /* + * Check first the document root against the NQName + */ + if (!xmlStrEqual(doc->intSubset->name, root->name)) { + if ((root->ns != NULL) && (root->ns->prefix != NULL)) { + xmlChar qname[500]; + snprintf((char *) qname, sizeof(qname), "%s:%s", + root->ns->prefix, root->name); + qname[sizeof(qname) - 1] = 0; + if (xmlStrEqual(doc->intSubset->name, qname)) + goto name_ok; + } + if ((xmlStrEqual(doc->intSubset->name, BAD_CAST "HTML")) && + (xmlStrEqual(root->name, BAD_CAST "html"))) + goto name_ok; + VECTXT(ctxt, root); + VERROR(ctxt->userData, + "Not valid: root and DTD name do not match '%s' and '%s'\n", + root->name, doc->intSubset->name); + return(0); + + } + } +name_ok: + return(1); +} + + +/** + * xmlValidateElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * + * Try to validate the subtree under an element + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr elem) { + xmlNodePtr child; + xmlAttrPtr attr; + xmlChar *value; + int ret = 1; + + if (elem == NULL) return(0); + + /* + * XInclude elements were added after parsing in the infoset, + * they don't really mean anything validation wise. + */ + if ((elem->type == XML_XINCLUDE_START) || + (elem->type == XML_XINCLUDE_END)) + return(1); + + CHECK_DTD; + + /* + * Entities references have to be handled separately + */ + if (elem->type == XML_ENTITY_REF_NODE) { + return(1); + } + + ret &= xmlValidateOneElement(ctxt, doc, elem); + attr = elem->properties; + while(attr != NULL) { + value = xmlNodeListGetString(doc, attr->children, 0); + ret &= xmlValidateOneAttribute(ctxt, doc, elem, attr, value); + if (value != NULL) + xmlFree(value); + attr= attr->next; + } + child = elem->children; + while (child != NULL) { + ret &= xmlValidateElement(ctxt, doc, child); + child = child->next; + } + + return(ret); +} + +/** + * xmlValidateRef: + * @ref: A reference to be validated + * @ctxt: Validation context + * @name: Name of ID we are searching for + * + */ +static void +xmlValidateRef(xmlRefPtr ref, xmlValidCtxtPtr ctxt, + const xmlChar *name) { + xmlAttrPtr id; + xmlAttrPtr attr; + + if (ref == NULL) + return; + attr = ref->attr; + if (attr == NULL) + return; + if (attr->atype == XML_ATTRIBUTE_IDREF) { + id = xmlGetID(ctxt->doc, name); + if (id == NULL) { + VECTXT(ctxt, attr->parent); + VERROR(ctxt->userData, + "IDREF attribute %s references an unknown ID \"%s\"\n", + attr->name, name); + ctxt->valid = 0; + } + } else if (attr->atype == XML_ATTRIBUTE_IDREFS) { + xmlChar *dup, *str = NULL, *cur, save; + + dup = xmlStrdup(name); + if (dup == NULL) { + ctxt->valid = 0; + return; + } + cur = dup; + while (*cur != 0) { + str = cur; + while ((*cur != 0) && (!IS_BLANK(*cur))) cur++; + save = *cur; + *cur = 0; + id = xmlGetID(ctxt->doc, str); + if (id == NULL) { + VECTXT(ctxt, attr->parent); + VERROR(ctxt->userData, + "IDREFS attribute %s references an unknown ID \"%s\"\n", + attr->name, str); + ctxt->valid = 0; + } + if (save == 0) + break; + *cur = save; + while (IS_BLANK(*cur)) cur++; + } + xmlFree(dup); + } +} + +/** + * xmlWalkValidateList: + * @data: Contents of current link + * @user: Value supplied by the user + * + * Returns 0 to abort the walk or 1 to continue + */ +static int +xmlWalkValidateList(const void *data, const void *user) +{ + xmlValidateMemoPtr memo = (xmlValidateMemoPtr)user; + xmlValidateRef((xmlRefPtr)data, memo->ctxt, memo->name); + return 1; +} + +/** + * xmlValidateCheckRefCallback: + * @ref_list: List of references + * @ctxt: Validation context + * @name: Name of ID we are searching for + * + */ +static void +xmlValidateCheckRefCallback(xmlListPtr ref_list, xmlValidCtxtPtr ctxt, + const xmlChar *name) { + xmlValidateMemo memo; + + if (ref_list == NULL) + return; + memo.ctxt = ctxt; + memo.name = name; + + xmlListWalk(ref_list, xmlWalkValidateList, &memo); + +} + +/** + * xmlValidateDocumentFinal: + * @ctxt: the validation context + * @doc: a document instance + * + * Does the final step for the document validation once all the + * incremental validation steps have been completed + * + * basically it does the following checks described by the XML Rec + * + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateDocumentFinal(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + xmlRefTablePtr table; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlValidateDocumentFinal: doc == NULL\n"); + return(0); + } + + /* + * Check all the NOTATION/NOTATIONS attributes + */ + /* + * Check all the ENTITY/ENTITIES attributes definition for validity + */ + /* + * Check all the IDREF/IDREFS attributes definition for validity + */ + table = (xmlRefTablePtr) doc->refs; + ctxt->doc = doc; + ctxt->valid = 1; + xmlHashScan(table, (xmlHashScanner) xmlValidateCheckRefCallback, ctxt); + return(ctxt->valid); +} + +/** + * xmlValidateDtd: + * @ctxt: the validation context + * @doc: a document instance + * @dtd: a dtd instance + * + * Try to validate the document against the dtd instance + * + * basically it does check all the definitions in the DtD. + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateDtd(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlDtdPtr dtd) { + int ret; + xmlDtdPtr oldExt; + xmlNodePtr root; + + if (dtd == NULL) return(0); + if (doc == NULL) return(0); + oldExt = doc->extSubset; + doc->extSubset = dtd; + ret = xmlValidateRoot(ctxt, doc); + if (ret == 0) { + doc->extSubset = oldExt; + return(ret); + } + if (doc->ids != NULL) { + xmlFreeIDTable(doc->ids); + doc->ids = NULL; + } + if (doc->refs != NULL) { + xmlFreeRefTable(doc->refs); + doc->refs = NULL; + } + root = xmlDocGetRootElement(doc); + ret = xmlValidateElement(ctxt, doc, root); + ret &= xmlValidateDocumentFinal(ctxt, doc); + doc->extSubset = oldExt; + return(ret); +} + +static void +xmlValidateNotationCallback(xmlEntityPtr cur, xmlValidCtxtPtr ctxt, + const xmlChar *name ATTRIBUTE_UNUSED) { + if (cur == NULL) + return; + if (cur->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + xmlChar *notation = cur->content; + + if (notation != NULL) { + int ret; + + ret = xmlValidateNotationUse(ctxt, cur->doc, notation); + if (ret != 1) { + ctxt->valid = 0; + } + } + } +} + +static void +xmlValidateAttributeCallback(xmlAttributePtr cur, xmlValidCtxtPtr ctxt, + const xmlChar *name ATTRIBUTE_UNUSED) { + int ret; + xmlDocPtr doc; + xmlElementPtr elem; + + if (cur == NULL) + return; + switch (cur->atype) { + case XML_ATTRIBUTE_CDATA: + case XML_ATTRIBUTE_ID: + case XML_ATTRIBUTE_IDREF : + case XML_ATTRIBUTE_IDREFS: + case XML_ATTRIBUTE_NMTOKEN: + case XML_ATTRIBUTE_NMTOKENS: + case XML_ATTRIBUTE_ENUMERATION: + break; + case XML_ATTRIBUTE_ENTITY: + case XML_ATTRIBUTE_ENTITIES: + case XML_ATTRIBUTE_NOTATION: + if (cur->defaultValue != NULL) { + + ret = xmlValidateAttributeValue2(ctxt, ctxt->doc, cur->name, + cur->atype, cur->defaultValue); + if ((ret == 0) && (ctxt->valid == 1)) + ctxt->valid = 0; + } + if (cur->tree != NULL) { + xmlEnumerationPtr tree = cur->tree; + while (tree != NULL) { + ret = xmlValidateAttributeValue2(ctxt, ctxt->doc, + cur->name, cur->atype, tree->name); + if ((ret == 0) && (ctxt->valid == 1)) + ctxt->valid = 0; + tree = tree->next; + } + } + } + if (cur->atype == XML_ATTRIBUTE_NOTATION) { + doc = cur->doc; + if ((doc == NULL) || (cur->elem == NULL)) { + VERROR(ctxt->userData, + "xmlValidateAttributeCallback(%s): internal error\n", + cur->name); + return; + } + elem = xmlGetDtdElementDesc(doc->intSubset, cur->elem); + if (elem == NULL) + elem = xmlGetDtdElementDesc(doc->extSubset, cur->elem); + if (elem == NULL) { + VERROR(ctxt->userData, + "attribute %s: could not find decl for element %s\n", + cur->name, cur->elem); + return; + } + if (elem->etype == XML_ELEMENT_TYPE_EMPTY) { + VERROR(ctxt->userData, + "NOTATION attribute %s declared for EMPTY element %s\n", + cur->name, cur->elem); + ctxt->valid = 0; + } + } +} + +/** + * xmlValidateDtdFinal: + * @ctxt: the validation context + * @doc: a document instance + * + * Does the final step for the dtds validation once all the + * subsets have been parsed + * + * basically it does the following checks described by the XML Rec + * - check that ENTITY and ENTITIES type attributes default or + * possible values matches one of the defined entities. + * - check that NOTATION type attributes default or + * possible values matches one of the defined notations. + * + * returns 1 if valid or 0 if invalid and -1 if not well-formed + */ + +int +xmlValidateDtdFinal(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + xmlDtdPtr dtd; + xmlAttributeTablePtr table; + xmlEntitiesTablePtr entities; + + if (doc == NULL) return(0); + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) + return(0); + ctxt->doc = doc; + ctxt->valid = 1; + dtd = doc->intSubset; + if ((dtd != NULL) && (dtd->attributes != NULL)) { + table = (xmlAttributeTablePtr) dtd->attributes; + xmlHashScan(table, (xmlHashScanner) xmlValidateAttributeCallback, ctxt); + } + if ((dtd != NULL) && (dtd->entities != NULL)) { + entities = (xmlEntitiesTablePtr) dtd->entities; + xmlHashScan(entities, (xmlHashScanner) xmlValidateNotationCallback, + ctxt); + } + dtd = doc->extSubset; + if ((dtd != NULL) && (dtd->attributes != NULL)) { + table = (xmlAttributeTablePtr) dtd->attributes; + xmlHashScan(table, (xmlHashScanner) xmlValidateAttributeCallback, ctxt); + } + if ((dtd != NULL) && (dtd->entities != NULL)) { + entities = (xmlEntitiesTablePtr) dtd->entities; + xmlHashScan(entities, (xmlHashScanner) xmlValidateNotationCallback, + ctxt); + } + return(ctxt->valid); +} + +/** + * xmlValidateDocument: + * @ctxt: the validation context + * @doc: a document instance + * + * Try to validate the document instance + * + * basically it does the all the checks described by the XML Rec + * i.e. validates the internal and external subset (if present) + * and validate the document tree. + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateDocument(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + int ret; + xmlNodePtr root; + + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) { + VERROR(ctxt->userData, "no DTD found!\n" ); + return(0); + } + if ((doc->intSubset != NULL) && ((doc->intSubset->SystemID != NULL) || + (doc->intSubset->ExternalID != NULL)) && (doc->extSubset == NULL)) { + doc->extSubset = xmlParseDTD(doc->intSubset->ExternalID, + doc->intSubset->SystemID); + if (doc->extSubset == NULL) { + if (doc->intSubset->SystemID != NULL) { + VERROR(ctxt->userData, + "Could not load the external subset \"%s\"\n", + doc->intSubset->SystemID); + } else { + VERROR(ctxt->userData, + "Could not load the external subset \"%s\"\n", + doc->intSubset->ExternalID); + } + return(0); + } + } + + if (doc->ids != NULL) { + xmlFreeIDTable(doc->ids); + doc->ids = NULL; + } + if (doc->refs != NULL) { + xmlFreeRefTable(doc->refs); + doc->refs = NULL; + } + ret = xmlValidateDtdFinal(ctxt, doc); + if (!xmlValidateRoot(ctxt, doc)) return(0); + + root = xmlDocGetRootElement(doc); + ret &= xmlValidateElement(ctxt, doc, root); + ret &= xmlValidateDocumentFinal(ctxt, doc); + return(ret); +} + + +/************************************************************************ + * * + * Routines for dynamic validation editing * + * * + ************************************************************************/ + +/** + * xmlValidGetPotentialChildren: + * @ctree: an element content tree + * @list: an array to store the list of child names + * @len: a pointer to the number of element in the list + * @max: the size of the array + * + * Build/extend a list of potential children allowed by the content tree + * + * returns the number of element in the list, or -1 in case of error. + */ + +int +xmlValidGetPotentialChildren(xmlElementContent *ctree, const xmlChar **list, + int *len, int max) { + int i; + + if ((ctree == NULL) || (list == NULL) || (len == NULL)) + return(-1); + if (*len >= max) return(*len); + + switch (ctree->type) { + case XML_ELEMENT_CONTENT_PCDATA: + for (i = 0; i < *len;i++) + if (xmlStrEqual(BAD_CAST "#PCDATA", list[i])) return(*len); + list[(*len)++] = BAD_CAST "#PCDATA"; + break; + case XML_ELEMENT_CONTENT_ELEMENT: + for (i = 0; i < *len;i++) + if (xmlStrEqual(ctree->name, list[i])) return(*len); + list[(*len)++] = ctree->name; + break; + case XML_ELEMENT_CONTENT_SEQ: + xmlValidGetPotentialChildren(ctree->c1, list, len, max); + xmlValidGetPotentialChildren(ctree->c2, list, len, max); + break; + case XML_ELEMENT_CONTENT_OR: + xmlValidGetPotentialChildren(ctree->c1, list, len, max); + xmlValidGetPotentialChildren(ctree->c2, list, len, max); + break; + } + + return(*len); +} + +/** + * xmlValidGetValidElements: + * @prev: an element to insert after + * @next: an element to insert next + * @list: an array to store the list of child names + * @max: the size of the array + * + * This function returns the list of authorized children to insert + * within an existing tree while respecting the validity constraints + * forced by the Dtd. The insertion point is defined using @prev and + * @next in the following ways: + * to insert before 'node': xmlValidGetValidElements(node->prev, node, ... + * to insert next 'node': xmlValidGetValidElements(node, node->next, ... + * to replace 'node': xmlValidGetValidElements(node->prev, node->next, ... + * to prepend a child to 'node': xmlValidGetValidElements(NULL, node->childs, + * to append a child to 'node': xmlValidGetValidElements(node->last, NULL, ... + * + * pointers to the element names are inserted at the beginning of the array + * and do not need to be freed. + * + * returns the number of element in the list, or -1 in case of error. If + * the function returns the value @max the caller is invited to grow the + * receiving array and retry. + */ + +int +xmlValidGetValidElements(xmlNode *prev, xmlNode *next, const xmlChar **list, + int max) { + xmlValidCtxt vctxt; + int nb_valid_elements = 0; + const xmlChar *elements[256]; + int nb_elements = 0, i; + const xmlChar *name; + + xmlNode *ref_node; + xmlNode *parent; + xmlNode *test_node; + + xmlNode *prev_next; + xmlNode *next_prev; + xmlNode *parent_childs; + xmlNode *parent_last; + + xmlElement *element_desc; + + memset(&vctxt, 0, sizeof (xmlValidCtxt)); + + if (prev == NULL && next == NULL) + return(-1); + + if (list == NULL) return(-1); + if (max <= 0) return(-1); + + nb_valid_elements = 0; + ref_node = prev ? prev : next; + parent = ref_node->parent; + + /* + * Retrieves the parent element declaration + */ + element_desc = xmlGetDtdElementDesc(parent->doc->intSubset, + parent->name); + if ((element_desc == NULL) && (parent->doc->extSubset != NULL)) + element_desc = xmlGetDtdElementDesc(parent->doc->extSubset, + parent->name); + if (element_desc == NULL) return(-1); + + /* + * Do a backup of the current tree structure + */ + prev_next = prev ? prev->next : NULL; + next_prev = next ? next->prev : NULL; + parent_childs = parent->children; + parent_last = parent->last; + + /* + * Creates a dummy node and insert it into the tree + */ + test_node = xmlNewNode (NULL, BAD_CAST "<!dummy?>"); + test_node->doc = ref_node->doc; + test_node->parent = parent; + test_node->prev = prev; + test_node->next = next; + name = test_node->name; + + if (prev) prev->next = test_node; + else parent->children = test_node; + + if (next) next->prev = test_node; + else parent->last = test_node; + + /* + * Insert each potential child node and check if the parent is + * still valid + */ + nb_elements = xmlValidGetPotentialChildren(element_desc->content, + elements, &nb_elements, 256); + + for (i = 0;i < nb_elements;i++) { + test_node->name = elements[i]; + if (xmlValidateOneElement(&vctxt, parent->doc, parent)) { + int j; + + for (j = 0; j < nb_valid_elements;j++) + if (xmlStrEqual(elements[i], list[j])) break; + list[nb_valid_elements++] = elements[i]; + if (nb_valid_elements >= max) break; + } + } + + /* + * Restore the tree structure + */ + if (prev) prev->next = prev_next; + if (next) next->prev = next_prev; + parent->children = parent_childs; + parent->last = parent_last; + + /* + * Free up the dummy node + */ + test_node->name = name; + xmlFreeNode(test_node); + + return(nb_valid_elements); +} diff --git a/bundle/libxml/xinclude.c b/bundle/libxml/xinclude.c new file mode 100644 index 0000000000..78ee0427b2 --- /dev/null +++ b/bundle/libxml/xinclude.c @@ -0,0 +1,1729 @@ +/* + * xinclude.c : Code to implement XInclude processing + * + * World Wide Web Consortium W3C Last Call Working Draft 16 May 2001 + * http://www.w3.org/TR/2001/WD-xinclude-20010516/ + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +/* + * TODO: compute XPointers nodesets + * TODO: add an node intermediate API and handle recursion at this level + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/uri.h> +#include <libxml/xpointer.h> +#include <libxml/parserInternals.h> +#include <libxml/xmlerror.h> +#include <libxml/encoding.h> +#include <libxml/globals.h> + +#ifdef LIBXML_XINCLUDE_ENABLED +#include <libxml/xinclude.h> + +#define XINCLUDE_NS (const xmlChar *) "http://www.w3.org/2001/XInclude" +#define XINCLUDE_NODE (const xmlChar *) "include" +#define XINCLUDE_FALLBACK (const xmlChar *) "fallback" +#define XINCLUDE_HREF (const xmlChar *) "href" +#define XINCLUDE_PARSE (const xmlChar *) "parse" +#define XINCLUDE_PARSE_XML (const xmlChar *) "xml" +#define XINCLUDE_PARSE_TEXT (const xmlChar *) "text" +#define XINCLUDE_PARSE_ENCODING (const xmlChar *) "encoding" + +/* #define DEBUG_XINCLUDE */ +#ifdef DEBUG_XINCLUDE +#ifdef LIBXML_DEBUG_ENABLED +#include <libxml/debugXML.h> +#endif +#endif + +/************************************************************************ + * * + * XInclude contexts handling * + * * + ************************************************************************/ + +/* + * An XInclude context + */ +typedef xmlChar *xmlURL; + +typedef struct _xmlXIncludeRef xmlXIncludeRef; +typedef xmlXIncludeRef *xmlXIncludeRefPtr; +struct _xmlXIncludeRef { + xmlChar *URI; /* the rully resolved resource URL */ + xmlChar *fragment; /* the fragment in the URI */ + xmlDocPtr doc; /* the parsed document */ + xmlNodePtr ref; /* the node making the reference in the source */ + xmlNodePtr inc; /* the included copy */ + int xml; /* xml or txt */ + int count; /* how many refs use that specific doc */ +}; + +typedef struct _xmlXIncludeCtxt xmlXIncludeCtxt; +typedef xmlXIncludeCtxt *xmlXIncludeCtxtPtr; +struct _xmlXIncludeCtxt { + xmlDocPtr doc; /* the source document */ + int incBase; /* the first include for this document */ + int incNr; /* number of includes */ + int incMax; /* size of includes tab */ + xmlXIncludeRefPtr *incTab; /* array of included references */ + + int txtNr; /* number of unparsed documents */ + int txtMax; /* size of unparsed documents tab */ + xmlNodePtr *txtTab; /* array of unparsed text nodes */ + xmlURL *txturlTab; /* array of unparsed txtuments URLs */ +}; + +static int +xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc); + +/** + * xmlXIncludeFreeRef: + * @ref: the XInclude reference + * + * Free an XInclude reference + */ +static void +xmlXIncludeFreeRef(xmlXIncludeRefPtr ref) { + if (ref == NULL) + return; +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "Freeing ref\n"); +#endif + if (ref->doc != NULL) { +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "Freeing doc %s\n", ref->URI); +#endif + xmlFreeDoc(ref->doc); + } + if (ref->URI != NULL) + xmlFree(ref->URI); + if (ref->fragment != NULL) + xmlFree(ref->fragment); + xmlFree(ref); +} + +/** + * xmlXIncludeNewRef: + * @ctxt: the XInclude context + * @URI: the resource URI + * + * Creates a new reference within an XInclude context + * + * Returns the new set + */ +static xmlXIncludeRefPtr +xmlXIncludeNewRef(xmlXIncludeCtxtPtr ctxt, const xmlChar *URI, + xmlNodePtr ref) { + xmlXIncludeRefPtr ret; + +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "New ref %s\n", URI); +#endif + ret = (xmlXIncludeRefPtr) xmlMalloc(sizeof(xmlXIncludeRef)); + if (ret == NULL) + return(NULL); + memset(ret, 0, sizeof(xmlXIncludeRef)); + if (URI == NULL) + ret->URI = NULL; + else + ret->URI = xmlStrdup(URI); + ret->fragment = NULL; + ret->ref = ref; + ret->doc = 0; + ret->count = 0; + ret->xml = 0; + ret->inc = NULL; + if (ctxt->incMax == 0) { + ctxt->incMax = 4; + ctxt->incTab = (xmlXIncludeRefPtr *) xmlMalloc(ctxt->incMax * + sizeof(ctxt->incTab[0])); + if (ctxt->incTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + xmlXIncludeFreeRef(ret); + return(NULL); + } + } + if (ctxt->incNr >= ctxt->incMax) { + ctxt->incMax *= 2; + ctxt->incTab = (xmlXIncludeRefPtr *) xmlRealloc(ctxt->incTab, + ctxt->incMax * sizeof(ctxt->incTab[0])); + if (ctxt->incTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + xmlXIncludeFreeRef(ret); + return(NULL); + } + } + ctxt->incTab[ctxt->incNr++] = ret; + return(ret); +} + +/** + * xmlXIncludeNewContext: + * @doc: an XML Document + * + * Creates a new XInclude context + * + * Returns the new set + */ +static xmlXIncludeCtxtPtr +xmlXIncludeNewContext(xmlDocPtr doc) { + xmlXIncludeCtxtPtr ret; + +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "New context\n"); +#endif + if (doc == NULL) + return(NULL); + ret = (xmlXIncludeCtxtPtr) xmlMalloc(sizeof(xmlXIncludeCtxt)); + if (ret == NULL) + return(NULL); + memset(ret, 0, sizeof(xmlXIncludeCtxt)); + ret->doc = doc; + ret->incNr = 0; + ret->incBase = 0; + ret->incMax = 0; + ret->incTab = NULL; + return(ret); +} + +/** + * xmlXIncludeFreeContext: + * @ctxt: the XInclude context + * + * Free an XInclude context + */ +static void +xmlXIncludeFreeContext(xmlXIncludeCtxtPtr ctxt) { + int i; + +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "Freeing context\n"); +#endif + if (ctxt == NULL) + return; + for (i = 0;i < ctxt->incNr;i++) { + if (ctxt->incTab[i] != NULL) + xmlXIncludeFreeRef(ctxt->incTab[i]); + } + for (i = 0;i < ctxt->txtNr;i++) { + if (ctxt->txturlTab[i] != NULL) + xmlFree(ctxt->txturlTab[i]); + } + if (ctxt->incTab != NULL) + xmlFree(ctxt->incTab); + if (ctxt->txtTab != NULL) + xmlFree(ctxt->txtTab); + if (ctxt->txturlTab != NULL) + xmlFree(ctxt->txturlTab); + xmlFree(ctxt); +} + +/** + * xmlXIncludeAddNode: + * @ctxt: the XInclude context + * @cur: the new node + * + * Add a new node to process to an XInclude context + */ +static int +xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) { + xmlXIncludeRefPtr ref; + xmlURIPtr uri; + xmlChar *URL; + xmlChar *fragment = NULL; + xmlChar *href; + xmlChar *parse; + xmlChar *base; + xmlChar *URI; + int xml = 1; /* default Issue 64 */ + + + if (ctxt == NULL) + return(-1); + if (cur == NULL) + return(-1); + +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "Add node\n"); +#endif + /* + * read the attributes + */ + href = xmlGetNsProp(cur, XINCLUDE_NS, XINCLUDE_HREF); + if (href == NULL) { + href = xmlGetProp(cur, XINCLUDE_HREF); + if (href == NULL) { + xmlGenericError(xmlGenericErrorContext, "XInclude: no href\n"); + return(-1); + } + } + parse = xmlGetNsProp(cur, XINCLUDE_NS, XINCLUDE_PARSE); + if (parse == NULL) { + parse = xmlGetProp(cur, XINCLUDE_PARSE); + } + if (parse != NULL) { + if (xmlStrEqual(parse, XINCLUDE_PARSE_XML)) + xml = 1; + else if (xmlStrEqual(parse, XINCLUDE_PARSE_TEXT)) + xml = 0; + else { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value %s for %s\n", + parse, XINCLUDE_PARSE); + if (href != NULL) + xmlFree(href); + if (parse != NULL) + xmlFree(parse); + return(-1); + } + } + + /* + * compute the URI + */ + base = xmlNodeGetBase(ctxt->doc, cur); + if (base == NULL) { + URI = xmlBuildURI(href, ctxt->doc->URL); + } else { + URI = xmlBuildURI(href, base); + } + if (URI == NULL) { + xmlChar *escbase; + xmlChar *eschref; + /* + * Some escaping may be needed + */ + escbase = xmlURIEscape(base); + eschref = xmlURIEscape(href); + URI = xmlBuildURI(eschref, escbase); + if (escbase != NULL) + xmlFree(escbase); + if (eschref != NULL) + xmlFree(eschref); + } + if (parse != NULL) + xmlFree(parse); + if (href != NULL) + xmlFree(href); + if (base != NULL) + xmlFree(base); + if (URI == NULL) { + xmlGenericError(xmlGenericErrorContext, "XInclude: failed build URL\n"); + return(-1); + } + + /* + * Check the URL and remove any fragment identifier + */ + uri = xmlParseURI((const char *)URI); + if (uri == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", URI); + return(-1); + } + if (uri->fragment != NULL) { + fragment = (xmlChar *) uri->fragment; + uri->fragment = NULL; + } + URL = xmlSaveUri(uri); + xmlFreeURI(uri); + xmlFree(URI); + if (URL == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", URI); + if (fragment != NULL) + xmlFree(fragment); + return(-1); + } + + ref = xmlXIncludeNewRef(ctxt, URL, cur); + if (ref == NULL) { + return(-1); + } + ref->fragment = fragment; + ref->doc = NULL; + ref->xml = xml; + ref->count = 1; + xmlFree(URL); + return(0); +} + +/** + * xmlXIncludeRecurseDoc: + * @ctxt: the XInclude context + * @doc: the new document + * @url: the associated URL + * + * The XInclude recursive nature is handled at this point. + */ +static void +xmlXIncludeRecurseDoc(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, + const xmlURL url ATTRIBUTE_UNUSED) { + xmlXIncludeCtxtPtr newctxt; + int i; + +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "Recursing in doc %s\n", doc->URL); +#endif + /* + * Handle recursion here. + */ + + newctxt = xmlXIncludeNewContext(doc); + if (newctxt != NULL) { + /* + * Copy the existing document set + */ + newctxt->incMax = ctxt->incMax; + newctxt->incNr = ctxt->incNr; + newctxt->incTab = (xmlXIncludeRefPtr *) xmlMalloc(newctxt->incMax * + sizeof(newctxt->incTab[0])); + if (newctxt->incTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + xmlFree(newctxt); + return; + } + + /* + * Inherit the documents already in use by others includes + */ + newctxt->incBase = ctxt->incNr; + for (i = 0;i < ctxt->incNr;i++) { + newctxt->incTab[i] = ctxt->incTab[i]; + newctxt->incTab[i]->count++; /* prevent the recursion from + freeing it */ + } + xmlXIncludeDoProcess(newctxt, doc); + for (i = 0;i < ctxt->incNr;i++) { + newctxt->incTab[i]->count--; + newctxt->incTab[i] = NULL; + } + xmlXIncludeFreeContext(newctxt); + } +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "Done recursing in doc %s\n", url); +#endif +} + +/** + * xmlXIncludeAddTxt: + * @ctxt: the XInclude context + * @txt: the new text node + * @url: the associated URL + * + * Add a new txtument to the list + */ +static void +xmlXIncludeAddTxt(xmlXIncludeCtxtPtr ctxt, xmlNodePtr txt, const xmlURL url) { +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "Adding text %s\n", url); +#endif + if (ctxt->txtMax == 0) { + ctxt->txtMax = 4; + ctxt->txtTab = (xmlNodePtr *) xmlMalloc(ctxt->txtMax * + sizeof(ctxt->txtTab[0])); + if (ctxt->txtTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + return; + } + ctxt->txturlTab = (xmlURL *) xmlMalloc(ctxt->txtMax * + sizeof(ctxt->txturlTab[0])); + if (ctxt->txturlTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + return; + } + } + if (ctxt->txtNr >= ctxt->txtMax) { + ctxt->txtMax *= 2; + ctxt->txtTab = (xmlNodePtr *) xmlRealloc(ctxt->txtTab, + ctxt->txtMax * sizeof(ctxt->txtTab[0])); + if (ctxt->txtTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return; + } + ctxt->txturlTab = (xmlURL *) xmlRealloc(ctxt->txturlTab, + ctxt->txtMax * sizeof(ctxt->txturlTab[0])); + if (ctxt->txturlTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return; + } + } + ctxt->txtTab[ctxt->txtNr] = txt; + ctxt->txturlTab[ctxt->txtNr] = xmlStrdup(url); + ctxt->txtNr++; +} + +/************************************************************************ + * * + * Node copy with specific semantic * + * * + ************************************************************************/ + +/** + * xmlXIncludeCopyNode: + * @ctxt: the XInclude context + * @target: the document target + * @source: the document source + * @elem: the element + * + * Make a copy of the node while preserving the XInclude semantic + * of the Infoset copy + */ +static xmlNodePtr +xmlXIncludeCopyNode(xmlXIncludeCtxtPtr ctxt, xmlDocPtr target, + xmlDocPtr source, xmlNodePtr elem) { + xmlNodePtr result = NULL; + + if ((ctxt == NULL) || (target == NULL) || (source == NULL) || + (elem == NULL)) + return(NULL); + if (elem->type == XML_DTD_NODE) + return(NULL); + result = xmlDocCopyNode(elem, target, 1); + return(result); +} + +/** + * xmlXIncludeCopyNodeList: + * @ctxt: the XInclude context + * @target: the document target + * @source: the document source + * @elem: the element list + * + * Make a copy of the node list while preserving the XInclude semantic + * of the Infoset copy + */ +static xmlNodePtr +xmlXIncludeCopyNodeList(xmlXIncludeCtxtPtr ctxt, xmlDocPtr target, + xmlDocPtr source, xmlNodePtr elem) { + xmlNodePtr cur, res, result = NULL, last = NULL; + + if ((ctxt == NULL) || (target == NULL) || (source == NULL) || + (elem == NULL)) + return(NULL); + cur = elem; + while (cur != NULL) { + res = xmlXIncludeCopyNode(ctxt, target, source, cur); + if (res != NULL) { + if (result == NULL) { + result = last = res; + } else { + last->next = res; + res->prev = last; + last = res; + } + } + cur = cur->next; + } + return(result); +} + +/** + * xmlXInclueGetNthChild: + * @cur: the node + * @no: the child number + * + * Returns the @no'th element child of @cur or NULL + */ +static xmlNodePtr +xmlXIncludeGetNthChild(xmlNodePtr cur, int no) { + int i; + if (cur == NULL) + return(cur); + cur = cur->children; + for (i = 0;i <= no;cur = cur->next) { + if (cur == NULL) + return(cur); + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + i++; + if (i == no) + break; + } + } + return(cur); +} + +xmlNodePtr xmlXPtrAdvanceNode(xmlNodePtr cur); + +/** + * xmlXIncludeCopyRange: + * @ctxt: the XInclude context + * @target: the document target + * @source: the document source + * @obj: the XPointer result from the evaluation. + * + * Build a node list tree copy of the XPointer result. + * + * Returns an xmlNodePtr list or NULL. + * the caller has to free the node tree. + */ +static xmlNodePtr +xmlXIncludeCopyRange(xmlXIncludeCtxtPtr ctxt, xmlDocPtr target, + xmlDocPtr source, xmlXPathObjectPtr range) { + /* pointers to generated nodes */ + xmlNodePtr list = NULL, last = NULL, parent = NULL, tmp; + /* pointers to traversal nodes */ + xmlNodePtr start, cur, end; + int index1, index2; + + if ((ctxt == NULL) || (target == NULL) || (source == NULL) || + (range == NULL)) + return(NULL); + if (range->type != XPATH_RANGE) + return(NULL); + start = (xmlNodePtr) range->user; + + if (start == NULL) + return(NULL); + end = range->user2; + if (end == NULL) + return(xmlDocCopyNode(start, target, 1)); + + cur = start; + index1 = range->index; + index2 = range->index2; + while (cur != NULL) { + if (cur == end) { + if (cur->type == XML_TEXT_NODE) { + const xmlChar *content = cur->content; + int len; + + if (content == NULL) { + tmp = xmlNewTextLen(NULL, 0); + } else { + len = index2; + if ((cur == start) && (index1 > 1)) { + content += (index1 - 1); + len -= (index1 - 1); + index1 = 0; + } else { + len = index2; + } + tmp = xmlNewTextLen(content, len); + } + /* single sub text node selection */ + if (list == NULL) + return(tmp); + /* prune and return full set */ + if (last != NULL) + xmlAddNextSibling(last, tmp); + else + xmlAddChild(parent, tmp); + return(list); + } else { + tmp = xmlDocCopyNode(cur, target, 0); + if (list == NULL) + list = tmp; + else { + if (last != NULL) + xmlAddNextSibling(last, tmp); + else + xmlAddChild(parent, tmp); + } + last = NULL; + parent = tmp; + + if (index2 > 1) { + end = xmlXIncludeGetNthChild(cur, index2 - 1); + index2 = 0; + } + if ((cur == start) && (index1 > 1)) { + cur = xmlXIncludeGetNthChild(cur, index1 - 1); + index1 = 0; + } else { + cur = cur->children; + } + /* + * Now gather the remaining nodes from cur to end + */ + continue; /* while */ + } + } else if ((cur == start) && + (list == NULL) /* looks superfluous but ... */ ) { + if ((cur->type == XML_TEXT_NODE) || + (cur->type == XML_CDATA_SECTION_NODE)) { + const xmlChar *content = cur->content; + + if (content == NULL) { + tmp = xmlNewTextLen(NULL, 0); + } else { + if (index1 > 1) { + content += (index1 - 1); + } + tmp = xmlNewText(content); + } + last = list = tmp; + } else { + if ((cur == start) && (index1 > 1)) { + tmp = xmlDocCopyNode(cur, target, 0); + list = tmp; + parent = tmp; + last = NULL; + cur = xmlXIncludeGetNthChild(cur, index1 - 1); + index1 = 0; + /* + * Now gather the remaining nodes from cur to end + */ + continue; /* while */ + } + tmp = xmlDocCopyNode(cur, target, 1); + list = tmp; + parent = NULL; + last = tmp; + } + } else { + tmp = NULL; + switch (cur->type) { + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_NODE: + /* Do not copy DTD informations */ + break; + case XML_ENTITY_DECL: + /* handle crossing entities -> stack needed */ + break; + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + /* don't consider it part of the tree content */ + break; + case XML_ATTRIBUTE_NODE: + /* Humm, should not happen ! */ + break; + default: + tmp = xmlDocCopyNode(cur, target, 1); + break; + } + if (tmp != NULL) { + if ((list == NULL) || ((last == NULL) && (parent == NULL))) { + return(NULL); + } + if (last != NULL) + xmlAddNextSibling(last, tmp); + else { + xmlAddChild(parent, tmp); + last = tmp; + } + } + } + /* + * Skip to next node in document order + */ + if ((list == NULL) || ((last == NULL) && (parent == NULL))) { + return(NULL); + } + cur = xmlXPtrAdvanceNode(cur); + } + return(list); +} + +/** + * xmlXIncludeBuildNodeList: + * @ctxt: the XInclude context + * @target: the document target + * @source: the document source + * @obj: the XPointer result from the evaluation. + * + * Build a node list tree copy of the XPointer result. + * This will drop Attributes and Namespace declarations. + * + * Returns an xmlNodePtr list or NULL. + * the caller has to free the node tree. + */ +static xmlNodePtr +xmlXIncludeCopyXPointer(xmlXIncludeCtxtPtr ctxt, xmlDocPtr target, + xmlDocPtr source, xmlXPathObjectPtr obj) { + xmlNodePtr list = NULL, last = NULL; + int i; + + if ((ctxt == NULL) || (target == NULL) || (source == NULL) || + (obj == NULL)) + return(NULL); + switch (obj->type) { + case XPATH_NODESET: { + xmlNodeSetPtr set = obj->nodesetval; + if (set == NULL) + return(NULL); + for (i = 0;i < set->nodeNr;i++) { + if (set->nodeTab[i] == NULL) + continue; + switch (set->nodeTab[i]->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ELEMENT_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + break; + case XML_ATTRIBUTE_NODE: + case XML_NAMESPACE_DECL: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + continue; /* for */ + } + if (last == NULL) + list = last = xmlXIncludeCopyNode(ctxt, target, source, + set->nodeTab[i]); + else { + xmlAddNextSibling(last, + xmlXIncludeCopyNode(ctxt, target, source, + set->nodeTab[i])); + if (last->next != NULL) + last = last->next; + } + } + break; + } + case XPATH_LOCATIONSET: { + xmlLocationSetPtr set = (xmlLocationSetPtr) obj->user; + if (set == NULL) + return(NULL); + for (i = 0;i < set->locNr;i++) { + if (last == NULL) + list = last = xmlXIncludeCopyXPointer(ctxt, target, source, + set->locTab[i]); + else + xmlAddNextSibling(last, + xmlXIncludeCopyXPointer(ctxt, target, source, + set->locTab[i])); + if (last != NULL) { + while (last->next != NULL) + last = last->next; + } + } + break; + } + case XPATH_RANGE: + return(xmlXIncludeCopyRange(ctxt, target, source, obj)); + case XPATH_POINT: + /* points are ignored in XInclude */ + break; + default: + break; + } + return(list); +} +/************************************************************************ + * * + * XInclude I/O handling * + * * + ************************************************************************/ + +/** + * xmlXIncludeLoadDoc: + * @ctxt: the XInclude context + * @url: the associated URL + * @nr: the xinclude node number + * + * Load the document, and store the result in the XInclude context + * + * Returns 0 in case of success, -1 in case of failure + */ +static int +xmlXIncludeLoadDoc(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) { + xmlDocPtr doc; + xmlURIPtr uri; + xmlChar *URL; + xmlChar *fragment = NULL; + int i = 0; + +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "Loading doc %s:%d\n", url, nr); +#endif + /* + * Check the URL and remove any fragment identifier + */ + uri = xmlParseURI((const char *)url); + if (uri == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", url); + return(-1); + } + if (uri->fragment != NULL) { + fragment = (xmlChar *) uri->fragment; + uri->fragment = NULL; + } + URL = xmlSaveUri(uri); + xmlFreeURI(uri); + if (URL == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", url); + if (fragment != NULL) + xmlFree(fragment); + return(-1); + } + + /* + * Handling of references to the local document are done + * directly through ctxt->doc. + */ + if ((URL[0] == 0) || (URL[0] == '#')) { + doc = NULL; + goto loaded; + } + + /* + * Prevent reloading twice the document. + */ + for (i = 0; i < ctxt->incNr; i++) { + if ((xmlStrEqual(URL, ctxt->incTab[i]->URI)) && + (ctxt->incTab[i]->doc != NULL)) { + doc = ctxt->incTab[i]->doc; +#ifdef DEBUG_XINCLUDE + printf("Already loaded %s\n", URL); +#endif + goto loaded; + } + } + + /* + * Load it. + */ +#ifdef DEBUG_XINCLUDE + printf("loading %s\n", URL); +#endif + doc = xmlParseFile((const char *)URL); + if (doc == NULL) { + xmlFree(URL); + if (fragment != NULL) + xmlFree(fragment); + return(-1); + } + ctxt->incTab[nr]->doc = doc; + + /* + * TODO: Make sure we have all entities fixed up + */ + + /* + * We don't need the DTD anymore, free up space + if (doc->intSubset != NULL) { + xmlUnlinkNode((xmlNodePtr) doc->intSubset); + xmlFreeNode((xmlNodePtr) doc->intSubset); + doc->intSubset = NULL; + } + if (doc->extSubset != NULL) { + xmlUnlinkNode((xmlNodePtr) doc->extSubset); + xmlFreeNode((xmlNodePtr) doc->extSubset); + doc->extSubset = NULL; + } + */ + xmlXIncludeRecurseDoc(ctxt, doc, URL); + +loaded: + if (fragment == NULL) { + /* + * Add the top children list as the replacement copy. + */ + if (doc == NULL) + { + /* Hopefully a DTD declaration won't be copied from + * the same document */ + ctxt->incTab[nr]->inc = xmlCopyNodeList(ctxt->doc->children); + } else { + ctxt->incTab[nr]->inc = xmlXIncludeCopyNodeList(ctxt, ctxt->doc, + doc, doc->children); + } + } else { + /* + * Computes the XPointer expression and make a copy used + * as the replacement copy. + */ + xmlXPathObjectPtr xptr; + xmlXPathContextPtr xptrctxt; + xmlNodeSetPtr set; + + if (doc == NULL) { + xptrctxt = xmlXPtrNewContext(ctxt->doc, ctxt->incTab[nr]->ref, + NULL); + } else { + xptrctxt = xmlXPtrNewContext(doc, NULL, NULL); + } + if (xptrctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: could create XPointer context\n"); + xmlFree(URL); + xmlFree(fragment); + return(-1); + } + xptr = xmlXPtrEval(fragment, xptrctxt); + if (xptr == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: XPointer evaluation failed: #%s\n", + fragment); + xmlXPathFreeContext(xptrctxt); + xmlFree(URL); + xmlFree(fragment); + return(-1); + } + switch (xptr->type) { + case XPATH_UNDEFINED: + case XPATH_BOOLEAN: + case XPATH_NUMBER: + case XPATH_STRING: + case XPATH_POINT: + case XPATH_USERS: + case XPATH_XSLT_TREE: + xmlGenericError(xmlGenericErrorContext, + "XInclude: XPointer is not a range: #%s\n", + fragment); + xmlXPathFreeContext(xptrctxt); + xmlFree(URL); + xmlFree(fragment); + return(-1); + case XPATH_NODESET: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + break; + } + set = xptr->nodesetval; + if (set != NULL) { + for (i = 0;i < set->nodeNr;i++) { + if (set->nodeTab[i] == NULL) + continue; + switch (set->nodeTab[i]->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ELEMENT_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + continue; + case XML_ATTRIBUTE_NODE: + xmlGenericError(xmlGenericErrorContext, + "XInclude: XPointer selects an attribute: #%s\n", + fragment); + set->nodeTab[i] = NULL; + continue; + case XML_NAMESPACE_DECL: + xmlGenericError(xmlGenericErrorContext, + "XInclude: XPointer selects a namespace: #%s\n", + fragment); + set->nodeTab[i] = NULL; + continue; + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + xmlGenericError(xmlGenericErrorContext, + "XInclude: XPointer selects unexpected nodes: #%s\n", + fragment); + set->nodeTab[i] = NULL; + set->nodeTab[i] = NULL; + continue; /* for */ + } + } + } + ctxt->incTab[nr]->inc = + xmlXIncludeCopyXPointer(ctxt, ctxt->doc, doc, xptr); + xmlXPathFreeObject(xptr); + xmlXPathFreeContext(xptrctxt); + xmlFree(fragment); + } + + /* + * Do the xml:base fixup if needed + */ + if ((doc != NULL) && (URL != NULL) && (xmlStrchr(URL, (xmlChar) '/'))) { + xmlNodePtr node; + + node = ctxt->incTab[nr]->inc; + while (node != NULL) { + if (node->type == XML_ELEMENT_NODE) + xmlNodeSetBase(node, URL); + node = node->next; + } + } + if ((nr < ctxt->incNr) && (ctxt->incTab[nr]->doc != NULL) && + (ctxt->incTab[nr]->count <= 1)) { +#ifdef DEBUG_XINCLUDE + printf("freeing %s\n", ctxt->incTab[nr]->doc->URL); +#endif + xmlFreeDoc(ctxt->incTab[nr]->doc); + ctxt->incTab[nr]->doc = NULL; + } + xmlFree(URL); + return(0); +} + +/** + * xmlXIncludeLoadTxt: + * @ctxt: the XInclude context + * @url: the associated URL + * @nr: the xinclude node number + * + * Load the content, and store the result in the XInclude context + * + * Returns 0 in case of success, -1 in case of failure + */ +static int +xmlXIncludeLoadTxt(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) { + xmlParserInputBufferPtr buf; + xmlNodePtr node; + xmlURIPtr uri; + xmlChar *URL; + int i; + xmlChar *encoding = NULL; + xmlCharEncoding enc = 0; + + /* + * Check the URL and remove any fragment identifier + */ + uri = xmlParseURI((const char *)url); + if (uri == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", url); + return(-1); + } + if (uri->fragment != NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: fragment identifier forbidden for text: %s\n", + uri->fragment); + xmlFreeURI(uri); + return(-1); + } + URL = xmlSaveUri(uri); + xmlFreeURI(uri); + if (URL == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", url); + return(-1); + } + + /* + * Handling of references to the local document are done + * directly through ctxt->doc. + */ + if (URL[0] == 0) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: text serialization of document not available\n"); + xmlFree(URL); + return(-1); + } + + /* + * Prevent reloading twice the document. + */ + for (i = 0; i < ctxt->txtNr; i++) { + if (xmlStrEqual(URL, ctxt->txturlTab[i])) { + node = xmlCopyNode(ctxt->txtTab[i], 1); + goto loaded; + } + } + /* + * Try to get the encoding if available + */ + if ((ctxt->incTab[nr] != NULL) && (ctxt->incTab[nr]->ref != NULL)) { + encoding = xmlGetProp(ctxt->incTab[nr]->ref, XINCLUDE_PARSE_ENCODING); + } + if (encoding != NULL) { + /* + * TODO: we should not have to remap to the xmlCharEncoding + * predefined set, a better interface than + * xmlParserInputBufferCreateFilename should allow any + * encoding supported by iconv + */ + enc = xmlParseCharEncoding((const char *) encoding); + if (enc == XML_CHAR_ENCODING_ERROR) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: encoding %s not supported\n", encoding); + xmlFree(encoding); + xmlFree(URL); + return(-1); + } + xmlFree(encoding); + } + + /* + * Load it. + */ + buf = xmlParserInputBufferCreateFilename((const char *)URL, enc); + if (buf == NULL) { + xmlFree(URL); + return(-1); + } + node = xmlNewText(NULL); + + /* + * Scan all chars from the resource and add the to the node + */ + while (xmlParserInputBufferRead(buf, 128) > 0) { + int len; + const xmlChar *content; + + content = xmlBufferContent(buf->buffer); + len = xmlBufferLength(buf->buffer); + for (i = 0;i < len;) { + int cur; + int l; + + cur = xmlStringCurrentChar(NULL, &content[i], &l); + if (!IS_CHAR(cur)) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: %s contains invalid char %d\n", URL, cur); + } else { + xmlNodeAddContentLen(node, &content[i], l); + } + i += l; + } + xmlBufferShrink(buf->buffer, len); + } + xmlFreeParserInputBuffer(buf); + xmlXIncludeAddTxt(ctxt, node, URL); + +loaded: + /* + * Add the element as the replacement copy. + */ + ctxt->incTab[nr]->inc = node; + xmlFree(URL); + return(0); +} + +/** + * xmlXIncludeLoadFallback: + * @ctxt: the XInclude context + * @fallback: the fallback node + * @nr: the xinclude node number + * + * Load the content of the fallback node, and store the result + * in the XInclude context + * + * Returns 0 in case of success, -1 in case of failure + */ +static int +xmlXIncludeLoadFallback(xmlXIncludeCtxtPtr ctxt, xmlNodePtr fallback, int nr) { + if ((fallback == NULL) || (ctxt == NULL)) + return(-1); + + ctxt->incTab[nr]->inc = xmlCopyNode(fallback->children, 1); + return(0); +} + +/************************************************************************ + * * + * XInclude Processing * + * * + ************************************************************************/ + +/** + * xmlXIncludePreProcessNode: + * @ctxt: an XInclude context + * @node: an XInclude node + * + * Implement the XInclude preprocessing, currently just adding the element + * for further processing. + * + * Returns the result list or NULL in case of error + */ +static xmlNodePtr +xmlXIncludePreProcessNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) { + xmlXIncludeAddNode(ctxt, node); + return(0); +} + +#if 0 +/** + * xmlXIncludePreloadNode: + * @ctxt: an XInclude context + * @nr: the node number + * + * Do some precomputations and preload shared documents + * + * Returns 0 if substitution succeeded, -1 if some processing failed + */ +static int +xmlXIncludePreloadNode(xmlXIncludeCtxtPtr ctxt, int nr) { + xmlNodePtr cur; + xmlChar *href; + xmlChar *parse; + xmlChar *base; + xmlChar *URI; + int xml = 1; /* default Issue 64 */ + xmlURIPtr uri; + xmlChar *URL; + xmlChar *fragment = NULL; + int i; + + + if (ctxt == NULL) + return(-1); + if ((nr < 0) || (nr >= ctxt->incNr)) + return(-1); + cur = ctxt->incTab[nr]->ref; + if (cur == NULL) + return(-1); + + /* + * read the attributes + */ + href = xmlGetNsProp(cur, XINCLUDE_NS, XINCLUDE_HREF); + if (href == NULL) { + href = xmlGetProp(cur, XINCLUDE_HREF); + if (href == NULL) { + xmlGenericError(xmlGenericErrorContext, "XInclude: no href\n"); + return(-1); + } + } + parse = xmlGetNsProp(cur, XINCLUDE_NS, XINCLUDE_PARSE); + if (parse == NULL) { + parse = xmlGetProp(cur, XINCLUDE_PARSE); + } + if (parse != NULL) { + if (xmlStrEqual(parse, XINCLUDE_PARSE_XML)) + xml = 1; + else if (xmlStrEqual(parse, XINCLUDE_PARSE_TEXT)) + xml = 0; + else { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value %s for %s\n", + parse, XINCLUDE_PARSE); + if (href != NULL) + xmlFree(href); + if (parse != NULL) + xmlFree(parse); + return(-1); + } + } + + /* + * compute the URI + */ + base = xmlNodeGetBase(ctxt->doc, cur); + if (base == NULL) { + URI = xmlBuildURI(href, ctxt->doc->URL); + } else { + URI = xmlBuildURI(href, base); + } + if (URI == NULL) { + xmlChar *escbase; + xmlChar *eschref; + /* + * Some escaping may be needed + */ + escbase = xmlURIEscape(base); + eschref = xmlURIEscape(href); + URI = xmlBuildURI(eschref, escbase); + if (escbase != NULL) + xmlFree(escbase); + if (eschref != NULL) + xmlFree(eschref); + } + if (parse != NULL) + xmlFree(parse); + if (href != NULL) + xmlFree(href); + if (base != NULL) + xmlFree(base); + if (URI == NULL) { + xmlGenericError(xmlGenericErrorContext, "XInclude: failed build URL\n"); + return(-1); + } + + /* + * Check the URL and remove any fragment identifier + */ + uri = xmlParseURI((const char *)URI); + if (uri == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", URI); + xmlFree(URI); + return(-1); + } + if (uri->fragment != NULL) { + fragment = (xmlChar *) uri->fragment; + uri->fragment = NULL; + } + URL = xmlSaveUri(uri); + xmlFreeURI(uri); + if (URL == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", URI); + if (fragment != NULL) + xmlFree(fragment); + xmlFree(URI); + return(-1); + } + xmlFree(URI); + if (fragment != NULL) + xmlFree(fragment); + + for (i = 0; i < nr; i++) { + if (xmlStrEqual(URL, ctxt->incTab[i]->URI)) { +#ifdef DEBUG_XINCLUDE + printf("Incrementing count for %d : %s\n", i, ctxt->incTab[i]->URI); +#endif + ctxt->incTab[i]->count++; + break; + } + } + xmlFree(URL); + return(0); +} +#endif + +/** + * xmlXIncludeLoadNode: + * @ctxt: an XInclude context + * @nr: the node number + * + * Find and load the infoset replacement for the given node. + * + * Returns 0 if substitution succeeded, -1 if some processing failed + */ +static int +xmlXIncludeLoadNode(xmlXIncludeCtxtPtr ctxt, int nr) { + xmlNodePtr cur; + xmlChar *href; + xmlChar *parse; + xmlChar *base; + xmlChar *URI; + int xml = 1; /* default Issue 64 */ + int ret; + + if (ctxt == NULL) + return(-1); + if ((nr < 0) || (nr >= ctxt->incNr)) + return(-1); + cur = ctxt->incTab[nr]->ref; + if (cur == NULL) + return(-1); + + /* + * read the attributes + */ + href = xmlGetNsProp(cur, XINCLUDE_NS, XINCLUDE_HREF); + if (href == NULL) { + href = xmlGetProp(cur, XINCLUDE_HREF); + if (href == NULL) { + xmlGenericError(xmlGenericErrorContext, "XInclude: no href\n"); + return(-1); + } + } + parse = xmlGetNsProp(cur, XINCLUDE_NS, XINCLUDE_PARSE); + if (parse == NULL) { + parse = xmlGetProp(cur, XINCLUDE_PARSE); + } + if (parse != NULL) { + if (xmlStrEqual(parse, XINCLUDE_PARSE_XML)) + xml = 1; + else if (xmlStrEqual(parse, XINCLUDE_PARSE_TEXT)) + xml = 0; + else { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value %s for %s\n", + parse, XINCLUDE_PARSE); + if (href != NULL) + xmlFree(href); + if (parse != NULL) + xmlFree(parse); + return(-1); + } + } + + /* + * compute the URI + */ + base = xmlNodeGetBase(ctxt->doc, cur); + if (base == NULL) { + URI = xmlBuildURI(href, ctxt->doc->URL); + } else { + URI = xmlBuildURI(href, base); + } + if (URI == NULL) { + xmlChar *escbase; + xmlChar *eschref; + /* + * Some escaping may be needed + */ + escbase = xmlURIEscape(base); + eschref = xmlURIEscape(href); + URI = xmlBuildURI(eschref, escbase); + if (escbase != NULL) + xmlFree(escbase); + if (eschref != NULL) + xmlFree(eschref); + } + if (URI == NULL) { + xmlGenericError(xmlGenericErrorContext, "XInclude: failed build URL\n"); + if (parse != NULL) + xmlFree(parse); + if (href != NULL) + xmlFree(href); + if (base != NULL) + xmlFree(base); + return(-1); + } +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "parse: %s\n", + xml ? "xml": "text"); + xmlGenericError(xmlGenericErrorContext, "URI: %s\n", URI); +#endif + + /* + * Cleanup + */ + if (xml) { + ret = xmlXIncludeLoadDoc(ctxt, URI, nr); + /* xmlXIncludeGetFragment(ctxt, cur, URI); */ + } else { + ret = xmlXIncludeLoadTxt(ctxt, URI, nr); + } + if (ret < 0) { + xmlNodePtr children; + + /* + * Time to try a fallback if availble + */ +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "error looking for fallback\n"); +#endif + children = cur->children; + while (children != NULL) { + if ((children->type == XML_ELEMENT_NODE) && + (children->ns != NULL) && + (xmlStrEqual(children->name, XINCLUDE_FALLBACK)) && + (xmlStrEqual(children->ns->href, XINCLUDE_NS))) { + ret = xmlXIncludeLoadFallback(ctxt, children, nr); + if (ret == 0) + break; + } + children = children->next; + } + } + if (ret < 0) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: could not load %s, and no fallback was found\n", + URI); + } + + /* + * Cleanup + */ + if (URI != NULL) + xmlFree(URI); + if (parse != NULL) + xmlFree(parse); + if (href != NULL) + xmlFree(href); + if (base != NULL) + xmlFree(base); + return(0); +} + +/** + * xmlXIncludeIncludeNode: + * @ctxt: an XInclude context + * @nr: the node number + * + * Inplement the infoset replacement for the given node + * + * Returns 0 if substitution succeeded, -1 if some processing failed + */ +static int +xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + xmlNodePtr cur, end, list; + + if (ctxt == NULL) + return(-1); + if ((nr < 0) || (nr >= ctxt->incNr)) + return(-1); + cur = ctxt->incTab[nr]->ref; + if (cur == NULL) + return(-1); + + /* + * Change the current node as an XInclude start one, and add an + * entity end one + */ + cur->type = XML_XINCLUDE_START; + end = xmlNewNode(cur->ns, cur->name); + if (end == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: failed to build node\n"); + return(-1); + } + end->type = XML_XINCLUDE_END; + xmlAddNextSibling(cur, end); + + /* + * Add the list of nodes + */ + list = ctxt->incTab[nr]->inc; + ctxt->incTab[nr]->inc = NULL; + while (list != NULL) { + cur = list; + list = list->next; + + xmlAddPrevSibling(end, cur); + } + + + return(0); +} + +/** + * xmlXIncludeTestNode: + * @node: an XInclude node + * + * test if the node is an XInclude node + * + * Returns 1 true, 0 otherwise + */ +static int +xmlXIncludeTestNode(xmlNodePtr node) { + if (node == NULL) + return(0); + if (node->ns == NULL) + return(0); + if ((xmlStrEqual(node->name, XINCLUDE_NODE)) && + (xmlStrEqual(node->ns->href, XINCLUDE_NS))) return(1); + return(0); +} + +/** + * xmlXIncludeDoProcess: + * @ctxt: + * @doc: an XML document + * + * Implement the XInclude substitution on the XML document @doc + * + * Returns 0 if no substitution were done, -1 if some processing failed + * or the number of substitutions done. + */ +static int +xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc) { + xmlNodePtr cur; + int ret = 0; + int i; + + if (doc == NULL) + return(-1); + if (ctxt == NULL) + return(-1); + + /* + * First phase: lookup the elements in the document + */ + cur = xmlDocGetRootElement(doc); + if (xmlXIncludeTestNode(cur)) + xmlXIncludePreProcessNode(ctxt, cur); + while (cur != NULL) { + /* TODO: need to work on entities -> stack */ + if ((cur->children != NULL) && + (cur->children->type != XML_ENTITY_DECL)) { + cur = cur->children; + if (xmlXIncludeTestNode(cur)) + xmlXIncludePreProcessNode(ctxt, cur); + } else if (cur->next != NULL) { + cur = cur->next; + if (xmlXIncludeTestNode(cur)) + xmlXIncludePreProcessNode(ctxt, cur); + } else { + do { + cur = cur->parent; + if (cur == NULL) break; /* do */ + if (cur->next != NULL) { + cur = cur->next; + if (xmlXIncludeTestNode(cur)) + xmlXIncludePreProcessNode(ctxt, cur); + break; /* do */ + } + } while (cur != NULL); + } + } + + /* + * Second Phase : collect the infosets fragments + */ + /* + for (i = ctxt->incBase;i < ctxt->incNr; i++) { + xmlXIncludePreloadNode(ctxt, i); + } + */ + for (i = ctxt->incBase;i < ctxt->incNr; i++) { + xmlXIncludeLoadNode(ctxt, i); + } + + /* + * Third phase: extend the original document infoset. + */ + for (i = ctxt->incBase;i < ctxt->incNr; i++) { + xmlXIncludeIncludeNode(ctxt, i); + } + + return(ret); +} + +/** + * xmlXIncludeProcess: + * @doc: an XML document + * + * Implement the XInclude substitution on the XML document @doc + * + * Returns 0 if no substitution were done, -1 if some processing failed + * or the number of substitutions done. + */ +int +xmlXIncludeProcess(xmlDocPtr doc) { + xmlXIncludeCtxtPtr ctxt; + int ret = 0; + + if (doc == NULL) + return(-1); + ctxt = xmlXIncludeNewContext(doc); + if (ctxt == NULL) + return(-1); + ret = xmlXIncludeDoProcess(ctxt, doc); + + xmlXIncludeFreeContext(ctxt); + return(ret); +} + +#else /* !LIBXML_XINCLUDE_ENABLED */ +#endif diff --git a/bundle/libxml/xlink.c b/bundle/libxml/xlink.c new file mode 100644 index 0000000000..17ec77f66f --- /dev/null +++ b/bundle/libxml/xlink.c @@ -0,0 +1,179 @@ +/* + * xlink.c : implementation of the hyperlinks detection module + * This version supports both XML XLinks and HTML simple links + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> /* for memset() only */ +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/valid.h> +#include <libxml/xlink.h> +#include <libxml/globals.h> + +#define XLINK_NAMESPACE (BAD_CAST "http://www.w3.org/1999/xlink/namespace/") +#define XHTML_NAMESPACE (BAD_CAST "http://www.w3.org/1999/xhtml/") + +/**************************************************************** + * * + * Default setting and related functions * + * * + ****************************************************************/ + +static xlinkHandlerPtr xlinkDefaultHandler = NULL; +static xlinkNodeDetectFunc xlinkDefaultDetect = NULL; + +/** + * xlinkGetDefaultHandler: + * + * Get the default xlink handler. + * + * Returns the current xlinkHandlerPtr value. + */ +xlinkHandlerPtr +xlinkGetDefaultHandler(void) { + return(xlinkDefaultHandler); +} + + +/** + * xlinkSetDefaultHandler: + * @handler: the new value for the xlink handler block + * + * Set the default xlink handlers + */ +void +xlinkSetDefaultHandler(xlinkHandlerPtr handler) { + xlinkDefaultHandler = handler; +} + +/** + * xlinkGetDefaultDetect: + * + * Get the default xlink detection routine + * + * Returns the current function or NULL; + */ +xlinkNodeDetectFunc +xlinkGetDefaultDetect (void) { + return(xlinkDefaultDetect); +} + +/** + * xlinkSetDefaultDetect: + * @func: pointer to the new detection routine. + * + * Set the default xlink detection routine + */ +void +xlinkSetDefaultDetect (xlinkNodeDetectFunc func) { + xlinkDefaultDetect = func; +} + +/**************************************************************** + * * + * The detection routines * + * * + ****************************************************************/ + + +/** + * xlinkIsLink: + * @doc: the document containing the node + * @node: the node pointer itself + * + * Check whether the given node carries the attributes needed + * to be a link element (or is one of the linking elements issued + * from the (X)HTML DtDs). + * This routine don't try to do full checking of the link validity + * but tries to detect and return the appropriate link type. + * + * Returns the xlinkType of the node (XLINK_TYPE_NONE if there is no + * link detected. + */ +xlinkType +xlinkIsLink (xmlDocPtr doc, xmlNodePtr node) { + xmlChar *type = NULL, *role = NULL; + xlinkType ret = XLINK_TYPE_NONE; + + if (node == NULL) return(XLINK_TYPE_NONE); + if (doc == NULL) doc = node->doc; + if ((doc != NULL) && (doc->type == XML_HTML_DOCUMENT_NODE)) { + /* + * This is an HTML document. + */ + } else if ((node->ns != NULL) && + (xmlStrEqual(node->ns->href, XHTML_NAMESPACE))) { + /* + * !!!! We really need an IS_XHTML_ELEMENT function from HTMLtree.h @@@ + */ + /* + * This is an XHTML element within an XML document + * Check whether it's one of the element able to carry links + * and in that case if it holds the attributes. + */ + } + + /* + * We don't prevent a-priori having XML Linking constructs on + * XHTML elements + */ + type = xmlGetNsProp(node, BAD_CAST"type", XLINK_NAMESPACE); + if (type != NULL) { + if (!xmlStrEqual(type, BAD_CAST "simple")) { + ret = XLINK_TYPE_SIMPLE; + } if (!xmlStrEqual(type, BAD_CAST "extended")) { + role = xmlGetNsProp(node, BAD_CAST "role", XLINK_NAMESPACE); + if (role != NULL) { + xmlNsPtr xlink; + xlink = xmlSearchNs(doc, node, XLINK_NAMESPACE); + if (xlink == NULL) { + /* Humm, fallback method */ + if (xmlStrEqual(role, BAD_CAST"xlink:external-linkset")) + ret = XLINK_TYPE_EXTENDED_SET; + } else { + xmlChar buf[200]; + snprintf((char *) buf, sizeof(buf), "%s:external-linkset", + (char *) xlink->prefix); + buf[sizeof(buf) - 1] = 0; + if (xmlStrEqual(role, buf)) + ret = XLINK_TYPE_EXTENDED_SET; + + } + + } + ret = XLINK_TYPE_EXTENDED; + } + } + + if (type != NULL) xmlFree(type); + if (role != NULL) xmlFree(role); + return(ret); +} diff --git a/bundle/libxml/xmlIO.c b/bundle/libxml/xmlIO.c new file mode 100644 index 0000000000..8bd9f9ca18 --- /dev/null +++ b/bundle/libxml/xmlIO.c @@ -0,0 +1,2839 @@ +/* + * xmlIO.c : implementation of the I/O interfaces used by the parser + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + * + * 14 Nov 2000 ht - for VMS, truncated name of long functions to under 32 char + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif + + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_ZLIB_H +#include <zlib.h> +#endif + +/* Figure a portable way to know if a file is a directory. */ +#ifndef HAVE_STAT +# ifdef HAVE__STAT + /* MS C library seems to define stat and _stat. The definition + is identical. Still, mapping them to each other causes a warning. */ +# ifndef _MSC_VER +# define stat(x,y) _stat(x,y) +# endif +# define HAVE_STAT +# endif +#endif +#ifdef HAVE_STAT +# ifndef S_ISDIR +# ifdef _S_ISDIR +# define S_ISDIR(x) _S_ISDIR(x) +# else +# ifdef S_IFDIR +# ifndef S_IFMT +# ifdef _S_IFMT +# define S_IFMT _S_IFMT +# endif +# endif +# ifdef S_IFMT +# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +# endif +# endif +# endif +# endif +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/xmlIO.h> +#include <libxml/uri.h> +#include <libxml/nanohttp.h> +#include <libxml/nanoftp.h> +#include <libxml/xmlerror.h> +#ifdef LIBXML_CATALOG_ENABLED +#include <libxml/catalog.h> +#endif +#include <libxml/globals.h> + +/* #define VERBOSE_FAILURE */ +/* #define DEBUG_EXTERNAL_ENTITIES */ +/* #define DEBUG_INPUT */ + +#ifdef DEBUG_INPUT +#define MINLEN 40 +#else +#define MINLEN 4000 +#endif + +/* + * Input I/O callback sets + */ +typedef struct _xmlInputCallback { + xmlInputMatchCallback matchcallback; + xmlInputOpenCallback opencallback; + xmlInputReadCallback readcallback; + xmlInputCloseCallback closecallback; +} xmlInputCallback; + +#define MAX_INPUT_CALLBACK 15 + +static xmlInputCallback xmlInputCallbackTable[MAX_INPUT_CALLBACK]; +static int xmlInputCallbackNr = 0; +static int xmlInputCallbackInitialized = 0; + +/* + * Output I/O callback sets + */ +typedef struct _xmlOutputCallback { + xmlOutputMatchCallback matchcallback; + xmlOutputOpenCallback opencallback; + xmlOutputWriteCallback writecallback; + xmlOutputCloseCallback closecallback; +} xmlOutputCallback; + +#define MAX_OUTPUT_CALLBACK 15 + +static xmlOutputCallback xmlOutputCallbackTable[MAX_OUTPUT_CALLBACK]; +static int xmlOutputCallbackNr = 0; +static int xmlOutputCallbackInitialized = 0; + +/************************************************************************ + * * + * Handling of Windows file paths * + * * + ************************************************************************/ + +#define IS_WINDOWS_PATH(p) \ + ((p != NULL) && \ + (((p[0] >= 'a') && (p[0] <= 'z')) || \ + ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ + (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) + + +/** + * xmlNormalizeWindowsPath: + * @path: a windows path like "C:/foo/bar" + * + * Normalize a Windows path to make an URL from it + * + * Returns a new URI which must be freed by the caller or NULL + * in case of error + */ +xmlChar * +xmlNormalizeWindowsPath(const xmlChar *path) +{ + int len, i = 0, j; + xmlChar *ret; + + if (path == NULL) + return(NULL); + + len = xmlStrlen(path); + if (!IS_WINDOWS_PATH(path)) { + ret = xmlStrdup(path); + if (ret == NULL) + return(NULL); + j = 0; + } else { + ret = xmlMalloc(len + 10); + if (ret == NULL) + return(NULL); + ret[0] = 'f'; + ret[1] = 'i'; + ret[2] = 'l'; + ret[3] = 'e'; + ret[4] = ':'; + ret[5] = '/'; + ret[6] = '/'; + ret[7] = '/'; + j = 8; + } + + while (i < len) { + /* TODO: UTF8 conversion + URI escaping ??? */ + if (path[i] == '\\') + ret[j] = '/'; + else + ret[j] = path[i]; + i++; + j++; + } + ret[j] = 0; + + return(ret); +} + +/** + * xmlCleanupInputCallbacks: + * + * clears the entire input callback table. this includes the + * compiled-in I/O. + */ +void +xmlCleanupInputCallbacks(void) +{ + int i; + + if (!xmlInputCallbackInitialized) + return; + + for (i = xmlInputCallbackNr - 1; i >= 0; i--) { + xmlInputCallbackTable[i].matchcallback = NULL; + xmlInputCallbackTable[i].opencallback = NULL; + xmlInputCallbackTable[i].readcallback = NULL; + xmlInputCallbackTable[i].closecallback = NULL; + } + xmlInputCallbackInitialized = 0; + + xmlInputCallbackNr = 0; + xmlInputCallbackInitialized = 0; +} + +/** + * xmlCleanupOutputCallbacks: + * + * clears the entire output callback table. this includes the + * compiled-in I/O callbacks. + */ +void +xmlCleanupOutputCallbacks(void) +{ + int i; + + if (!xmlOutputCallbackInitialized) + return; + + for (i = xmlOutputCallbackNr - 1; i >= 0; i--) { + xmlOutputCallbackTable[i].matchcallback = NULL; + xmlOutputCallbackTable[i].opencallback = NULL; + xmlOutputCallbackTable[i].writecallback = NULL; + xmlOutputCallbackTable[i].closecallback = NULL; + } + xmlOutputCallbackInitialized = 0; + + xmlOutputCallbackNr = 0; + xmlOutputCallbackInitialized = 0; +} + +/************************************************************************ + * * + * Standard I/O for file accesses * + * * + ************************************************************************/ + +/** + * xmlCheckFilename: + * @path: the path to check + * + * function checks to see if @path is a valid source + * (file, socket...) for XML. + * + * if stat is not available on the target machine, + * returns 1. if stat fails, returns 0 (if calling + * stat on the filename fails, it can't be right). + * if stat succeeds and the file is a directory, + * returns 2. otherwise returns 1. + */ + +int +xmlCheckFilename (const char *path) +{ +#ifdef HAVE_STAT + struct stat stat_buffer; + + if (stat(path, &stat_buffer) == -1) + return 0; + +#ifdef S_ISDIR + if (S_ISDIR(stat_buffer.st_mode)) { + return 2; + } +#endif +#endif + return 1; +} + +static int +xmlNop(void) { + return(0); +} + +/** + * xmlFdRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to read + * + * Read @len bytes to @buffer from the I/O channel. + * + * Returns the number of bytes written + */ +static int +xmlFdRead (void * context, char * buffer, int len) { + return(read((int) (long) context, &buffer[0], len)); +} + +/** + * xmlFdWrite: + * @context: the I/O context + * @buffer: where to get data + * @len: number of bytes to write + * + * Write @len bytes from @buffer to the I/O channel. + * + * Returns the number of bytes written + */ +static int +xmlFdWrite (void * context, const char * buffer, int len) { + return(write((int) (long) context, &buffer[0], len)); +} + +/** + * xmlFdClose: + * @context: the I/O context + * + * Close an I/O channel + * + * Returns 0 in case of success and error code otherwise + */ +static int +xmlFdClose (void * context) { + return ( close((int) (long) context) ); +} + +/** + * xmlFileMatch: + * @filename: the URI for matching + * + * input from FILE * + * + * Returns 1 if matches, 0 otherwise + */ +int +xmlFileMatch (const char *filename ATTRIBUTE_UNUSED) { + return(1); +} + +/** + * xmlFileOpen: + * @filename: the URI for matching + * + * input from FILE *, supports compressed input + * if @filename is " " then the standard input is used + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlFileOpen (const char *filename) { + const char *path = NULL; + FILE *fd; + + if (!strcmp(filename, "-")) { + fd = stdin; + return((void *) fd); + } + + if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17)) +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &filename[17]; +#else + path = &filename[16]; +#endif + else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) { +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &filename[8]; +#else + path = &filename[7]; +#endif + } else + path = filename; + + if (path == NULL) + return(NULL); + if (!xmlCheckFilename(path)) + return(NULL); + +#if defined(WIN32) || defined (__CYGWIN__) + fd = fopen(path, "rb"); +#else + fd = fopen(path, "r"); +#endif /* WIN32 */ + return((void *) fd); +} + +/** + * xmlFileOpenW: + * @filename: the URI for matching + * + * output to from FILE *, + * if @filename is "-" then the standard output is used + * + * Returns an I/O context or NULL in case of error + */ +static void * +xmlFileOpenW (const char *filename) { + const char *path = NULL; + FILE *fd; + + if (!strcmp(filename, "-")) { + fd = stdout; + return((void *) fd); + } + + if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17)) +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &filename[17]; +#else + path = &filename[16]; +#endif + else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) { +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &filename[8]; +#else + path = &filename[7]; +#endif + } else + path = filename; + + if (path == NULL) + return(NULL); + + fd = fopen(path, "w"); + return((void *) fd); +} + +/** + * xmlFileRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Read @len bytes to @buffer from the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlFileRead (void * context, char * buffer, int len) { + return(fread(&buffer[0], 1, len, (FILE *) context)); +} + +/** + * xmlFileWrite: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Write @len bytes from @buffer to the I/O channel. + * + * Returns the number of bytes written + */ +static int +xmlFileWrite (void * context, const char * buffer, int len) { + return(fwrite(&buffer[0], 1, len, (FILE *) context)); +} + +/** + * xmlFileClose: + * @context: the I/O context + * + * Close an I/O channel + * + * Returns 0 or -1 in case of error + */ +int +xmlFileClose (void * context) { + FILE *fil; + + fil = (FILE *) context; + if (fil == stdin) + return(0); + if (fil == stdout) + return(0); + if (fil == stderr) + return(0); + return ( ( fclose((FILE *) context) == EOF ) ? -1 : 0 ); +} + +/** + * xmlFileFlush: + * @context: the I/O context + * + * Flush an I/O channel + */ +static int +xmlFileFlush (void * context) { + return ( ( fflush((FILE *) context) == EOF ) ? -1 : 0 ); +} + +#ifdef HAVE_ZLIB_H +/************************************************************************ + * * + * I/O for compressed file accesses * + * * + ************************************************************************/ +/** + * xmlGzfileMatch: + * @filename: the URI for matching + * + * input from compressed file test + * + * Returns 1 if matches, 0 otherwise + */ +static int +xmlGzfileMatch (const char *filename ATTRIBUTE_UNUSED) { + return(1); +} + +/** + * xmlGzfileOpen: + * @filename: the URI for matching + * + * input from compressed file open + * if @filename is " " then the standard input is used + * + * Returns an I/O context or NULL in case of error + */ +static void * +xmlGzfileOpen (const char *filename) { + const char *path = NULL; + gzFile fd; + + if (!strcmp(filename, "-")) { + fd = gzdopen(dup(0), "rb"); + return((void *) fd); + } + + if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17)) +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &filename[17]; +#else + path = &filename[16]; +#endif + else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) { +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &filename[8]; +#else + path = &filename[7]; +#endif + } else + path = filename; + + if (path == NULL) + return(NULL); + if (!xmlCheckFilename(path)) + return(NULL); + + fd = gzopen(path, "rb"); + return((void *) fd); +} + +/** + * xmlGzfileOpenW: + * @filename: the URI for matching + * @compression: the compression factor (0 - 9 included) + * + * input from compressed file open + * if @filename is " " then the standard input is used + * + * Returns an I/O context or NULL in case of error + */ +static void * +xmlGzfileOpenW (const char *filename, int compression) { + const char *path = NULL; + char mode[15]; + gzFile fd; + + snprintf(mode, sizeof(mode), "wb%d", compression); + if (!strcmp(filename, "-")) { + fd = gzdopen(dup(1), mode); + return((void *) fd); + } + + if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17)) +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &filename[17]; +#else + path = &filename[16]; +#endif + else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) { +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &filename[8]; +#else + path = &filename[7]; +#endif + } else + path = filename; + + if (path == NULL) + return(NULL); + + fd = gzopen(path, mode); + return((void *) fd); +} + +/** + * xmlGzfileRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Read @len bytes to @buffer from the compressed I/O channel. + * + * Returns the number of bytes written + */ +static int +xmlGzfileRead (void * context, char * buffer, int len) { + return(gzread((gzFile) context, &buffer[0], len)); +} + +/** + * xmlGzfileWrite: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Write @len bytes from @buffer to the compressed I/O channel. + * + * Returns the number of bytes written + */ +static int +xmlGzfileWrite (void * context, const char * buffer, int len) { + return(gzwrite((gzFile) context, (char *) &buffer[0], len)); +} + +/** + * xmlGzfileClose: + * @context: the I/O context + * + * Close a compressed I/O channel + */ +static int +xmlGzfileClose (void * context) { + return ( ( gzclose((gzFile) context) == Z_OK ) ? 0 : -1 ); +} +#endif /* HAVE_ZLIB_H */ + +#ifdef LIBXML_HTTP_ENABLED +/************************************************************************ + * * + * I/O for HTTP file accesses * + * * + ************************************************************************/ + +typedef struct xmlIOHTTPWriteCtxt_ +{ + int compression; + + char * uri; + + void * doc_buff; + +} xmlIOHTTPWriteCtxt, *xmlIOHTTPWriteCtxtPtr; + +#ifdef HAVE_ZLIB_H + +#define DFLT_WBITS ( -15 ) +#define DFLT_MEM_LVL ( 8 ) +#define GZ_MAGIC1 ( 0x1f ) +#define GZ_MAGIC2 ( 0x8b ) +#define LXML_ZLIB_OS_CODE ( 0x03 ) +#define INIT_HTTP_BUFF_SIZE ( 32768 ) +#define DFLT_ZLIB_RATIO ( 5 ) + +/* +** Data structure and functions to work with sending compressed data +** via HTTP. +*/ + +typedef struct xmlZMemBuff_ +{ + unsigned long size; + unsigned long crc; + + unsigned char * zbuff; + z_stream zctrl; + +} xmlZMemBuff, *xmlZMemBuffPtr; + +/** + * append_reverse_ulong + * @buff: Compressed memory buffer + * @data: Unsigned long to append + * + * Append a unsigned long in reverse byte order to the end of the + * memory buffer. + */ +static void +append_reverse_ulong( xmlZMemBuff * buff, unsigned long data ) { + + int idx; + + if ( buff == NULL ) + return; + + /* + ** This is plagiarized from putLong in gzio.c (zlib source) where + ** the number "4" is hardcoded. If zlib is ever patched to + ** support 64 bit file sizes, this code would need to be patched + ** as well. + */ + + for ( idx = 0; idx < 4; idx++ ) { + *buff->zctrl.next_out = ( data & 0xff ); + data >>= 8; + buff->zctrl.next_out++; + } + + return; +} + +/** + * + * xmlFreeZMemBuff + * @buff: The memory buffer context to clear + * + * Release all the resources associated with the compressed memory buffer. + */ +static void +xmlFreeZMemBuff( xmlZMemBuffPtr buff ) { + + int z_err; + + if ( buff == NULL ) + return; + + xmlFree( buff->zbuff ); + z_err = deflateEnd( &buff->zctrl ); +#ifdef DEBUG_HTTP + if ( z_err != Z_OK ) + xmlGenericError( xmlGenericErrorContext, + "xmlFreeZMemBuff: Error releasing zlib context: %d\n", + z_err ); +#endif + + xmlFree( buff ); + return; +} + +/** + * xmlCreateZMemBuff + *@compression: Compression value to use + * + * Create a memory buffer to hold the compressed XML document. The + * compressed document in memory will end up being identical to what + * would be created if gzopen/gzwrite/gzclose were being used to + * write the document to disk. The code for the header/trailer data to + * the compression is plagiarized from the zlib source files. + */ +static void * +xmlCreateZMemBuff( int compression ) { + + int z_err; + int hdr_lgth; + xmlZMemBuffPtr buff = NULL; + + if ( ( compression < 1 ) || ( compression > 9 ) ) + return ( NULL ); + + /* Create the control and data areas */ + + buff = xmlMalloc( sizeof( xmlZMemBuff ) ); + if ( buff == NULL ) { + xmlGenericError( xmlGenericErrorContext, + "xmlCreateZMemBuff: %s\n", + "Failure allocating buffer context." ); + return ( NULL ); + } + + (void)memset( buff, 0, sizeof( xmlZMemBuff ) ); + buff->size = INIT_HTTP_BUFF_SIZE; + buff->zbuff = xmlMalloc( buff->size ); + if ( buff->zbuff == NULL ) { + xmlFreeZMemBuff( buff ); + xmlGenericError( xmlGenericErrorContext, + "xmlCreateZMemBuff: %s\n", + "Failure allocating data buffer." ); + return ( NULL ); + } + + z_err = deflateInit2( &buff->zctrl, compression, Z_DEFLATED, + DFLT_WBITS, DFLT_MEM_LVL, Z_DEFAULT_STRATEGY ); + if ( z_err != Z_OK ) { + xmlFreeZMemBuff( buff ); + buff = NULL; + xmlGenericError( xmlGenericErrorContext, + "xmlCreateZMemBuff: %s %d\n", + "Error initializing compression context. ZLIB error:", + z_err ); + return ( NULL ); + } + + /* Set the header data. The CRC will be needed for the trailer */ + buff->crc = crc32( 0L, Z_NULL, 0 ); + hdr_lgth = snprintf( (char *)buff->zbuff, buff->size, + "%c%c%c%c%c%c%c%c%c%c", + GZ_MAGIC1, GZ_MAGIC2, Z_DEFLATED, + 0, 0, 0, 0, 0, 0, LXML_ZLIB_OS_CODE ); + buff->zctrl.next_out = buff->zbuff + hdr_lgth; + buff->zctrl.avail_out = buff->size - hdr_lgth; + + return ( buff ); +} + +/** + * xmlZMemBuffExtend + * @buff: Buffer used to compress and consolidate data. + * @ext_amt: Number of bytes to extend the buffer. + * + * Extend the internal buffer used to store the compressed data by the + * specified amount. + * + * Returns 0 on success or -1 on failure to extend the buffer. On failure + * the original buffer still exists at the original size. + */ +static int +xmlZMemBuffExtend( xmlZMemBuffPtr buff, size_t ext_amt ) { + + int rc = -1; + size_t new_size; + size_t cur_used; + + unsigned char * tmp_ptr = NULL; + + if ( buff == NULL ) + return ( -1 ); + + else if ( ext_amt == 0 ) + return ( 0 ); + + cur_used = buff->zctrl.next_out - buff->zbuff; + new_size = buff->size + ext_amt; + +#ifdef DEBUG_HTTP + if ( cur_used > new_size ) + xmlGenericError( xmlGenericErrorContext, + "xmlZMemBuffExtend: %s\n%s %d bytes.\n", + "Buffer overwrite detected during compressed memory", + "buffer extension. Overflowed by", + (cur_used - new_size ) ); +#endif + + tmp_ptr = xmlRealloc( buff->zbuff, new_size ); + if ( tmp_ptr != NULL ) { + rc = 0; + buff->size = new_size; + buff->zbuff = tmp_ptr; + buff->zctrl.next_out = tmp_ptr + cur_used; + buff->zctrl.avail_out = new_size - cur_used; + } + else { + xmlGenericError( xmlGenericErrorContext, + "xmlZMemBuffExtend: %s %lu bytes.\n", + "Allocation failure extending output buffer to", + new_size ); + } + + return ( rc ); +} + +/** + * xmlZMemBuffAppend + * @buff: Buffer used to compress and consolidate data + * @src: Uncompressed source content to append to buffer + * @len: Length of source data to append to buffer + * + * Compress and append data to the internal buffer. The data buffer + * will be expanded if needed to store the additional data. + * + * Returns the number of bytes appended to the buffer or -1 on error. + */ +static int +xmlZMemBuffAppend( xmlZMemBuffPtr buff, const char * src, int len ) { + + int z_err; + size_t min_accept; + + if ( ( buff == NULL ) || ( src == NULL ) ) + return ( -1 ); + + buff->zctrl.avail_in = len; + buff->zctrl.next_in = (unsigned char *)src; + while ( buff->zctrl.avail_in > 0 ) { + /* + ** Extend the buffer prior to deflate call if a reasonable amount + ** of output buffer space is not available. + */ + min_accept = buff->zctrl.avail_in / DFLT_ZLIB_RATIO; + if ( buff->zctrl.avail_out <= min_accept ) { + if ( xmlZMemBuffExtend( buff, buff->size ) == -1 ) + return ( -1 ); + } + + z_err = deflate( &buff->zctrl, Z_NO_FLUSH ); + if ( z_err != Z_OK ) { + xmlGenericError( xmlGenericErrorContext, + "xmlZMemBuffAppend: %s %d %s - %d", + "Compression error while appending", + len, "bytes to buffer. ZLIB error", z_err ); + return ( -1 ); + } + } + + buff->crc = crc32( buff->crc, (unsigned char *)src, len ); + + return ( len ); +} + +/** + * xmlZMemBuffGetContent + * @buff: Compressed memory content buffer + * @data_ref: Pointer reference to point to compressed content + * + * Flushes the compression buffers, appends gzip file trailers and + * returns the compressed content and length of the compressed data. + * NOTE: The gzip trailer code here is plagiarized from zlib source. + * + * Returns the length of the compressed data or -1 on error. + */ +static int +xmlZMemBuffGetContent( xmlZMemBuffPtr buff, char ** data_ref ) { + + int zlgth = -1; + int z_err; + + if ( ( buff == NULL ) || ( data_ref == NULL ) ) + return ( -1 ); + + /* Need to loop until compression output buffers are flushed */ + + do + { + z_err = deflate( &buff->zctrl, Z_FINISH ); + if ( z_err == Z_OK ) { + /* In this case Z_OK means more buffer space needed */ + + if ( xmlZMemBuffExtend( buff, buff->size ) == -1 ) + return ( -1 ); + } + } + while ( z_err == Z_OK ); + + /* If the compression state is not Z_STREAM_END, some error occurred */ + + if ( z_err == Z_STREAM_END ) { + + /* Need to append the gzip data trailer */ + + if ( buff->zctrl.avail_out < ( 2 * sizeof( unsigned long ) ) ) { + if ( xmlZMemBuffExtend(buff, (2 * sizeof(unsigned long))) == -1 ) + return ( -1 ); + } + + /* + ** For whatever reason, the CRC and length data are pushed out + ** in reverse byte order. So a memcpy can't be used here. + */ + + append_reverse_ulong( buff, buff->crc ); + append_reverse_ulong( buff, buff->zctrl.total_in ); + + zlgth = buff->zctrl.next_out - buff->zbuff; + *data_ref = (char *)buff->zbuff; + } + + else + xmlGenericError( xmlGenericErrorContext, + "xmlZMemBuffGetContent: %s - %d\n", + "Error flushing zlib buffers. Error code", z_err ); + + return ( zlgth ); +} +#endif /* HAVE_ZLIB_H */ + +/** + * xmlFreeHTTPWriteCtxt + * @ctxt: Context to cleanup + * + * Free allocated memory and reclaim system resources. + * + * No return value. + */ +static void +xmlFreeHTTPWriteCtxt( xmlIOHTTPWriteCtxtPtr ctxt ) +{ + if ( ctxt->uri != NULL ) + xmlFree( ctxt->uri ); + + if ( ctxt->doc_buff != NULL ) { + +#ifdef HAVE_ZLIB_H + if ( ctxt->compression > 0 ) { + xmlFreeZMemBuff( ctxt->doc_buff ); + } + else +#endif + { + xmlOutputBufferClose( ctxt->doc_buff ); + } + } + + xmlFree( ctxt ); + return; +} + + +/** + * xmlIOHTTPMatch: + * @filename: the URI for matching + * + * check if the URI matches an HTTP one + * + * Returns 1 if matches, 0 otherwise + */ +int +xmlIOHTTPMatch (const char *filename) { + if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "http://", 7)) + return(1); + return(0); +} + +/** + * xmlIOHTTPOpen: + * @filename: the URI for matching + * + * open an HTTP I/O channel + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlIOHTTPOpen (const char *filename) { + return(xmlNanoHTTPOpen(filename, NULL)); +} + +/** + * xmlIOHTTPOpenW: + * @post_uri: The destination URI for the document + * @compression: The compression desired for the document. + * + * Open a temporary buffer to collect the document for a subsequent HTTP POST + * request. Non-static as is called from the output buffer creation routine. + * + * Returns an I/O context or NULL in case of error. + */ + +void * +xmlIOHTTPOpenW(const char *post_uri, int compression) +{ + + xmlIOHTTPWriteCtxtPtr ctxt = NULL; + + if (post_uri == NULL) + return (NULL); + + ctxt = xmlMalloc(sizeof(xmlIOHTTPWriteCtxt)); + if (ctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlIOHTTPOpenW: Failed to create output HTTP context.\n"); + return (NULL); + } + + (void) memset(ctxt, 0, sizeof(xmlIOHTTPWriteCtxt)); + + ctxt->uri = (char *) xmlStrdup((const xmlChar *)post_uri); + if (ctxt->uri == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlIOHTTPOpenW: Failed to duplicate destination URI.\n"); + xmlFreeHTTPWriteCtxt(ctxt); + return (NULL); + } + + /* + * ** Since the document length is required for an HTTP post, + * ** need to put the document into a buffer. A memory buffer + * ** is being used to avoid pushing the data to disk and back. + */ + +#ifdef HAVE_ZLIB_H + if ((compression > 0) && (compression <= 9)) { + + ctxt->compression = compression; + ctxt->doc_buff = xmlCreateZMemBuff(compression); + } else +#endif + { + /* Any character conversions should have been done before this */ + + ctxt->doc_buff = xmlAllocOutputBuffer(NULL); + } + + if (ctxt->doc_buff == NULL) { + xmlFreeHTTPWriteCtxt(ctxt); + ctxt = NULL; + } + + return (ctxt); +} + +/** + * xmlIOHTTPDfltOpenW + * @post_uri: The destination URI for this document. + * + * Calls xmlIOHTTPOpenW with no compression to set up for a subsequent + * HTTP post command. This function should generally not be used as + * the open callback is short circuited in xmlOutputBufferCreateFile. + * + * Returns a pointer to the new IO context. + */ +static void * +xmlIOHTTPDfltOpenW( const char * post_uri ) { + return ( xmlIOHTTPOpenW( post_uri, 0 ) ); +} + +/** + * xmlIOHTTPRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Read @len bytes to @buffer from the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlIOHTTPRead(void * context, char * buffer, int len) { + return(xmlNanoHTTPRead(context, &buffer[0], len)); +} + +/** + * xmlIOHTTPWrite + * @context: previously opened writing context + * @buffer: data to output to temporary buffer + * @len: bytes to output + * + * Collect data from memory buffer into a temporary file for later + * processing. + * + * Returns number of bytes written. + */ + +static int +xmlIOHTTPWrite( void * context, const char * buffer, int len ) { + + xmlIOHTTPWriteCtxtPtr ctxt = context; + + if ( ( ctxt == NULL ) || ( ctxt->doc_buff == NULL ) || ( buffer == NULL ) ) + return ( -1 ); + + if ( len > 0 ) { + + /* Use gzwrite or fwrite as previously setup in the open call */ + +#ifdef HAVE_ZLIB_H + if ( ctxt->compression > 0 ) + len = xmlZMemBuffAppend( ctxt->doc_buff, buffer, len ); + + else +#endif + len = xmlOutputBufferWrite( ctxt->doc_buff, len, buffer ); + + if ( len < 0 ) { + xmlGenericError( xmlGenericErrorContext, + "xmlIOHTTPWrite: %s\n%s '%s'.\n", + "Error appending to internal buffer.", + "Error sending document to URI", + ctxt->uri ); + } + } + + return ( len ); +} + + +/** + * xmlIOHTTPClose: + * @context: the I/O context + * + * Close an HTTP I/O channel + * + * Returns 0 + */ +int +xmlIOHTTPClose (void * context) { + xmlNanoHTTPClose(context); + return 0; +} + +/** + * xmlIOHTTCloseWrite + * @context: The I/O context + * @http_mthd: The HTTP method to be used when sending the data + * + * Close the transmit HTTP I/O channel and actually send the data. + */ +static int +xmlIOHTTPCloseWrite( void * context, const char * http_mthd ) { + + int close_rc = -1; + int http_rtn = 0; + int content_lgth = 0; + xmlIOHTTPWriteCtxtPtr ctxt = context; + + char * http_content = NULL; + char * content_encoding = NULL; + char * content_type = (char *) "text/xml"; + void * http_ctxt = NULL; + + if ( ( ctxt == NULL ) || ( http_mthd == NULL ) ) + return ( -1 ); + + /* Retrieve the content from the appropriate buffer */ + +#ifdef HAVE_ZLIB_H + + if ( ctxt->compression > 0 ) { + content_lgth = xmlZMemBuffGetContent( ctxt->doc_buff, &http_content ); + content_encoding = (char *) "Content-Encoding: gzip"; + } + else +#endif + { + /* Pull the data out of the memory output buffer */ + + xmlOutputBufferPtr dctxt = ctxt->doc_buff; + http_content = (char *)dctxt->buffer->content; + content_lgth = dctxt->buffer->use; + } + + if ( http_content == NULL ) { + xmlGenericError( xmlGenericErrorContext, + "xmlIOHTTPCloseWrite: %s '%s' %s '%s'.\n", + "Error retrieving content.\nUnable to", + http_mthd, "data to URI", ctxt->uri ); + } + + else { + + http_ctxt = xmlNanoHTTPMethod( ctxt->uri, http_mthd, http_content, + &content_type, content_encoding, + content_lgth ); + + if ( http_ctxt != NULL ) { +#ifdef DEBUG_HTTP + /* If testing/debugging - dump reply with request content */ + + FILE * tst_file = NULL; + char buffer[ 4096 ]; + char * dump_name = NULL; + int avail; + + xmlGenericError( xmlGenericErrorContext, + "xmlNanoHTTPCloseWrite: HTTP %s to\n%s returned %d.\n", + http_mthd, ctxt->uri, + xmlNanoHTTPReturnCode( http_ctxt ) ); + + /* + ** Since either content or reply may be gzipped, + ** dump them to separate files instead of the + ** standard error context. + */ + + dump_name = tempnam( NULL, "lxml" ); + if ( dump_name != NULL ) { + (void)snprintf( buffer, sizeof(buffer), "%s.content", dump_name ); + + tst_file = fopen( buffer, "w" ); + if ( tst_file != NULL ) { + xmlGenericError( xmlGenericErrorContext, + "Transmitted content saved in file: %s\n", buffer ); + + fwrite( http_content, sizeof( char ), + content_lgth, tst_file ); + fclose( tst_file ); + } + + (void)snprintf( buffer, sizeof(buffer), "%s.reply", dump_name ); + tst_file = fopen( buffer, "w" ); + if ( tst_file != NULL ) { + xmlGenericError( xmlGenericErrorContext, + "Reply content saved in file: %s\n", buffer ); + + + while ( (avail = xmlNanoHTTPRead( http_ctxt, + buffer, sizeof( buffer ) )) > 0 ) { + + fwrite( buffer, sizeof( char ), avail, tst_file ); + } + + fclose( tst_file ); + } + + free( dump_name ); + } +#endif /* DEBUG_HTTP */ + + http_rtn = xmlNanoHTTPReturnCode( http_ctxt ); + if ( ( http_rtn >= 200 ) && ( http_rtn < 300 ) ) + close_rc = 0; + else + xmlGenericError( xmlGenericErrorContext, + "xmlIOHTTPCloseWrite: HTTP '%s' of %d %s\n'%s' %s %d\n", + http_mthd, content_lgth, + "bytes to URI", ctxt->uri, + "failed. HTTP return code:", http_rtn ); + + xmlNanoHTTPClose( http_ctxt ); + xmlFree( content_type ); + } + } + + /* Final cleanups */ + + xmlFreeHTTPWriteCtxt( ctxt ); + + return ( close_rc ); +} + +/** + * xmlIOHTTPClosePut + * + * @context: The I/O context + * + * Close the transmit HTTP I/O channel and actually send data using a PUT + * HTTP method. + */ +static int +xmlIOHTTPClosePut( void * ctxt ) { + return ( xmlIOHTTPCloseWrite( ctxt, "PUT" ) ); +} + + +/** + * xmlIOHTTPClosePost + * + * @context: The I/O context + * + * Close the transmit HTTP I/O channel and actually send data using a POST + * HTTP method. + */ +static int +xmlIOHTTPClosePost( void * ctxt ) { + return ( xmlIOHTTPCloseWrite( ctxt, "POST" ) ); +} + +#endif /* LIBXML_HTTP_ENABLED */ + +#ifdef LIBXML_FTP_ENABLED +/************************************************************************ + * * + * I/O for FTP file accesses * + * * + ************************************************************************/ +/** + * xmlIOFTPMatch: + * @filename: the URI for matching + * + * check if the URI matches an FTP one + * + * Returns 1 if matches, 0 otherwise + */ +int +xmlIOFTPMatch (const char *filename) { + if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "ftp://", 6)) + return(1); + return(0); +} + +/** + * xmlIOFTPOpen: + * @filename: the URI for matching + * + * open an FTP I/O channel + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlIOFTPOpen (const char *filename) { + return(xmlNanoFTPOpen(filename)); +} + +/** + * xmlIOFTPRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Read @len bytes to @buffer from the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlIOFTPRead(void * context, char * buffer, int len) { + return(xmlNanoFTPRead(context, &buffer[0], len)); +} + +/** + * xmlIOFTPClose: + * @context: the I/O context + * + * Close an FTP I/O channel + * + * Returns 0 + */ +int +xmlIOFTPClose (void * context) { + return ( xmlNanoFTPClose(context) ); +} +#endif /* LIBXML_FTP_ENABLED */ + + +/** + * xmlRegisterInputCallbacks: + * @matchFunc: the xmlInputMatchCallback + * @openFunc: the xmlInputOpenCallback + * @readFunc: the xmlInputReadCallback + * @closeFunc: the xmlInputCloseCallback + * + * Register a new set of I/O callback for handling parser input. + * + * Returns the registered handler number or -1 in case of error + */ +int +xmlRegisterInputCallbacks(xmlInputMatchCallback matchFunc, + xmlInputOpenCallback openFunc, xmlInputReadCallback readFunc, + xmlInputCloseCallback closeFunc) { + if (xmlInputCallbackNr >= MAX_INPUT_CALLBACK) { + return(-1); + } + xmlInputCallbackTable[xmlInputCallbackNr].matchcallback = matchFunc; + xmlInputCallbackTable[xmlInputCallbackNr].opencallback = openFunc; + xmlInputCallbackTable[xmlInputCallbackNr].readcallback = readFunc; + xmlInputCallbackTable[xmlInputCallbackNr].closecallback = closeFunc; + return(xmlInputCallbackNr++); +} + +/** + * xmlRegisterOutputCallbacks: + * @matchFunc: the xmlOutputMatchCallback + * @openFunc: the xmlOutputOpenCallback + * @writeFunc: the xmlOutputWriteCallback + * @closeFunc: the xmlOutputCloseCallback + * + * Register a new set of I/O callback for handling output. + * + * Returns the registered handler number or -1 in case of error + */ +int +xmlRegisterOutputCallbacks(xmlOutputMatchCallback matchFunc, + xmlOutputOpenCallback openFunc, xmlOutputWriteCallback writeFunc, + xmlOutputCloseCallback closeFunc) { + if (xmlOutputCallbackNr >= MAX_INPUT_CALLBACK) { + return(-1); + } + xmlOutputCallbackTable[xmlOutputCallbackNr].matchcallback = matchFunc; + xmlOutputCallbackTable[xmlOutputCallbackNr].opencallback = openFunc; + xmlOutputCallbackTable[xmlOutputCallbackNr].writecallback = writeFunc; + xmlOutputCallbackTable[xmlOutputCallbackNr].closecallback = closeFunc; + return(xmlOutputCallbackNr++); +} + +/** + * xmlRegisterDefaultInputCallbacks: + * + * Registers the default compiled-in I/O handlers. + */ +void +xmlRegisterDefaultInputCallbacks +(void) { + if (xmlInputCallbackInitialized) + return; + + xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen, + xmlFileRead, xmlFileClose); +#ifdef HAVE_ZLIB_H + xmlRegisterInputCallbacks(xmlGzfileMatch, xmlGzfileOpen, + xmlGzfileRead, xmlGzfileClose); +#endif /* HAVE_ZLIB_H */ + +#ifdef LIBXML_HTTP_ENABLED + xmlRegisterInputCallbacks(xmlIOHTTPMatch, xmlIOHTTPOpen, + xmlIOHTTPRead, xmlIOHTTPClose); +#endif /* LIBXML_HTTP_ENABLED */ + +#ifdef LIBXML_FTP_ENABLED + xmlRegisterInputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen, + xmlIOFTPRead, xmlIOFTPClose); +#endif /* LIBXML_FTP_ENABLED */ + xmlInputCallbackInitialized = 1; +} + +/** + * xmlRegisterDefaultOutputCallbacks: + * + * Registers the default compiled-in I/O handlers. + */ +void +xmlRegisterDefaultOutputCallbacks +(void) { + if (xmlOutputCallbackInitialized) + return; + + xmlRegisterOutputCallbacks(xmlFileMatch, xmlFileOpenW, + xmlFileWrite, xmlFileClose); + +#ifdef LIBXML_HTTP_ENABLED + xmlRegisterOutputCallbacks(xmlIOHTTPMatch, xmlIOHTTPDfltOpenW, + xmlIOHTTPWrite, xmlIOHTTPClosePut); +#endif + +/********************************* + No way a-priori to distinguish between gzipped files from + uncompressed ones except opening if existing then closing + and saving with same compression ratio ... a pain. + +#ifdef HAVE_ZLIB_H + xmlRegisterOutputCallbacks(xmlGzfileMatch, xmlGzfileOpen, + xmlGzfileWrite, xmlGzfileClose); +#endif + + Nor FTP PUT .... +#ifdef LIBXML_FTP_ENABLED + xmlRegisterOutputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen, + xmlIOFTPWrite, xmlIOFTPClose); +#endif + **********************************/ + xmlOutputCallbackInitialized = 1; +} + +#ifdef LIBXML_HTTP_ENABLED +/** + * xmlRegisterHTTPPostCallbacks: + * + * By default, libxml submits HTTP output requests using the "PUT" method. + * Calling this method changes the HTTP output method to use the "POST" + * method instead. + * + */ +void +xmlRegisterHTTPPostCallbacks( void ) { + + /* Register defaults if not done previously */ + + if ( xmlOutputCallbackInitialized == 0 ) + xmlRegisterDefaultOutputCallbacks( ); + + xmlRegisterOutputCallbacks(xmlIOHTTPMatch, xmlIOHTTPDfltOpenW, + xmlIOHTTPWrite, xmlIOHTTPClosePost); + return; +} +#endif + +/** + * xmlAllocParserInputBuffer: + * @enc: the charset encoding if known + * + * Create a buffered parser input for progressive parsing + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlAllocParserInputBuffer(xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAllocParserInputBuffer : out of memory!\n"); + return(NULL); + } + memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer)); + ret->buffer = xmlBufferCreate(); + if (ret->buffer == NULL) { + xmlFree(ret); + return(NULL); + } + ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT; + ret->encoder = xmlGetCharEncodingHandler(enc); + if (ret->encoder != NULL) + ret->raw = xmlBufferCreate(); + else + ret->raw = NULL; + ret->readcallback = NULL; + ret->closecallback = NULL; + ret->context = NULL; + + return(ret); +} + +/** + * xmlAllocOutputBuffer: + * @encoder: the encoding converter or NULL + * + * Create a buffered parser output + * + * Returns the new parser output or NULL + */ +xmlOutputBufferPtr +xmlAllocOutputBuffer(xmlCharEncodingHandlerPtr encoder) { + xmlOutputBufferPtr ret; + + ret = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAllocOutputBuffer : out of memory!\n"); + return(NULL); + } + memset(ret, 0, (size_t) sizeof(xmlOutputBuffer)); + ret->buffer = xmlBufferCreate(); + if (ret->buffer == NULL) { + xmlFree(ret); + return(NULL); + } + ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT; + ret->encoder = encoder; + if (encoder != NULL) { + ret->conv = xmlBufferCreateSize(4000); + /* + * This call is designed to initiate the encoder state + */ + xmlCharEncOutFunc(encoder, ret->conv, NULL); + } else + ret->conv = NULL; + ret->writecallback = NULL; + ret->closecallback = NULL; + ret->context = NULL; + ret->written = 0; + + return(ret); +} + +/** + * xmlFreeParserInputBuffer: + * @in: a buffered parser input + * + * Free up the memory used by a buffered parser input + */ +void +xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) { + if (in->raw) { + xmlBufferFree(in->raw); + in->raw = NULL; + } + if (in->encoder != NULL) { + xmlCharEncCloseFunc(in->encoder); + } + if (in->closecallback != NULL) { + in->closecallback(in->context); + } + if (in->buffer != NULL) { + xmlBufferFree(in->buffer); + in->buffer = NULL; + } + + xmlFree(in); +} + +/** + * xmlOutputBufferClose: + * @out: a buffered output + * + * flushes and close the output I/O channel + * and free up all the associated resources + * + * Returns the number of byte written or -1 in case of error. + */ +int +xmlOutputBufferClose(xmlOutputBufferPtr out) { + int written; + int err_rc = 0; + + if (out == NULL) + return(-1); + if (out->writecallback != NULL) + xmlOutputBufferFlush(out); + if (out->closecallback != NULL) { + err_rc = out->closecallback(out->context); + } + written = out->written; + if (out->conv) { + xmlBufferFree(out->conv); + out->conv = NULL; + } + if (out->encoder != NULL) { + xmlCharEncCloseFunc(out->encoder); + } + if (out->buffer != NULL) { + xmlBufferFree(out->buffer); + out->buffer = NULL; + } + + xmlFree(out); + return( ( err_rc == 0 ) ? written : err_rc ); +} + +/** + * xmlParserInputBufferCreateFname: + * @URI: a C string containing the URI or filename + * @enc: the charset encoding if known + * + * Returns the new parser input or NULL + */ +/** + * xmlParserInputBufferCreateFilename: + * @URI: a C string containing the URI or filename + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing of a file + * If filename is "-' then we use stdin as the input. + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * Do an encoding check if enc == XML_CHAR_ENCODING_NONE + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateFilename +(const char *URI, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + int i = 0; + void *context = NULL; + char *unescaped; + char *normalized; + + if (xmlInputCallbackInitialized == 0) + xmlRegisterDefaultInputCallbacks(); + + if (URI == NULL) return(NULL); + normalized = (char *) xmlNormalizeWindowsPath((const xmlChar *)URI); + if (normalized == NULL) return(NULL); + +#ifdef LIBXML_CATALOG_ENABLED +#endif + + /* + * Try to find one of the input accept method accepting that scheme + * Go in reverse to give precedence to user defined handlers. + * try with an unescaped version of the URI + */ + unescaped = xmlURIUnescapeString((char *) normalized, 0, NULL); + if (unescaped != NULL) { + for (i = xmlInputCallbackNr - 1;i >= 0;i--) { + if ((xmlInputCallbackTable[i].matchcallback != NULL) && + (xmlInputCallbackTable[i].matchcallback(unescaped) != 0)) { + context = xmlInputCallbackTable[i].opencallback(unescaped); + if (context != NULL) + break; + } + } + xmlFree(unescaped); + } + + /* + * If this failed try with a non-escaped URI this may be a strange + * filename + */ + if (context == NULL) { + for (i = xmlInputCallbackNr - 1;i >= 0;i--) { + if ((xmlInputCallbackTable[i].matchcallback != NULL) && + (xmlInputCallbackTable[i].matchcallback(URI) != 0)) { + context = xmlInputCallbackTable[i].opencallback(normalized); + if (context != NULL) + break; + } + } + } + xmlFree(normalized); + if (context == NULL) { + return(NULL); + } + + /* + * Allocate the Input buffer front-end. + */ + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = context; + ret->readcallback = xmlInputCallbackTable[i].readcallback; + ret->closecallback = xmlInputCallbackTable[i].closecallback; + } + return(ret); +} + +/** + * xmlOutputBufferCreateFilename: + * @URI: a C string containing the URI or filename + * @encoder: the encoding converter or NULL + * @compression: the compression ration (0 none, 9 max). + * + * Create a buffered output for the progressive saving of a file + * If filename is "-' then we use stdout as the output. + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * TODO: currently if compression is set, the library only support + * writing to a local file. + * + * Returns the new output or NULL + */ +xmlOutputBufferPtr +xmlOutputBufferCreateFilename(const char *URI, + xmlCharEncodingHandlerPtr encoder, + int compression) { + xmlOutputBufferPtr ret; + int i = 0; + void *context = NULL; + char *unescaped; + char *normalized; + + int is_http_uri = 0; /* Can't change if HTTP disabled */ + + if (xmlOutputCallbackInitialized == 0) + xmlRegisterDefaultOutputCallbacks(); + + if (URI == NULL) return(NULL); + normalized = (char *) xmlNormalizeWindowsPath((const xmlChar *)URI); + if (normalized == NULL) return(NULL); + +#ifdef LIBXML_HTTP_ENABLED + /* Need to prevent HTTP URI's from falling into zlib short circuit */ + + is_http_uri = xmlIOHTTPMatch( normalized ); +#endif + + + /* + * Try to find one of the output accept method accepting that scheme + * Go in reverse to give precedence to user defined handlers. + * try with an unescaped version of the URI + */ + unescaped = xmlURIUnescapeString(normalized, 0, NULL); + if (unescaped != NULL) { +#ifdef HAVE_ZLIB_H + if ((compression > 0) && (compression <= 9) && (is_http_uri == 0)) { + context = xmlGzfileOpenW(unescaped, compression); + if (context != NULL) { + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = context; + ret->writecallback = xmlGzfileWrite; + ret->closecallback = xmlGzfileClose; + } + xmlFree(unescaped); + xmlFree(normalized); + return(ret); + } + } +#endif + for (i = xmlOutputCallbackNr - 1;i >= 0;i--) { + if ((xmlOutputCallbackTable[i].matchcallback != NULL) && + (xmlOutputCallbackTable[i].matchcallback(unescaped) != 0)) { +#if defined(LIBXML_HTTP_ENABLED) && defined(HAVE_ZLIB_H) + /* Need to pass compression parameter into HTTP open calls */ + if (xmlOutputCallbackTable[i].matchcallback == xmlIOHTTPMatch) + context = xmlIOHTTPOpenW(unescaped, compression); + else +#endif + context = xmlOutputCallbackTable[i].opencallback(unescaped); + if (context != NULL) + break; + } + } + xmlFree(unescaped); + } + + /* + * If this failed try with a non-escaped URI this may be a strange + * filename + */ + if (context == NULL) { +#ifdef HAVE_ZLIB_H + if ((compression > 0) && (compression <= 9) && (is_http_uri == 0)) { + context = xmlGzfileOpenW(normalized, compression); + if (context != NULL) { + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = context; + ret->writecallback = xmlGzfileWrite; + ret->closecallback = xmlGzfileClose; + } + xmlFree(normalized); + return(ret); + } + } +#endif + for (i = xmlOutputCallbackNr - 1;i >= 0;i--) { + if ((xmlOutputCallbackTable[i].matchcallback != NULL) && + (xmlOutputCallbackTable[i].matchcallback(normalized) != 0)) { +#if defined(LIBXML_HTTP_ENABLED) && defined(HAVE_ZLIB_H) + /* Need to pass compression parameter into HTTP open calls */ + if (xmlOutputCallbackTable[i].matchcallback == xmlIOHTTPMatch) + context = xmlIOHTTPOpenW(URI, compression); + else +#endif + context = xmlOutputCallbackTable[i].opencallback(URI); + if (context != NULL) + break; + } + } + } + xmlFree(normalized); + + if (context == NULL) { + return(NULL); + } + + /* + * Allocate the Output buffer front-end. + */ + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = context; + ret->writecallback = xmlOutputCallbackTable[i].writecallback; + ret->closecallback = xmlOutputCallbackTable[i].closecallback; + } + return(ret); +} + +/** + * xmlParserInputBufferCreateFile: + * @file: a FILE* + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing of a FILE * + * buffered C I/O + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (xmlInputCallbackInitialized == 0) + xmlRegisterDefaultInputCallbacks(); + + if (file == NULL) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = file; + ret->readcallback = xmlFileRead; + ret->closecallback = xmlFileFlush; + } + + return(ret); +} + +/** + * xmlOutputBufferCreateFile: + * @file: a FILE* + * @encoder: the encoding converter or NULL + * + * Create a buffered output for the progressive saving to a FILE * + * buffered C I/O + * + * Returns the new parser output or NULL + */ +xmlOutputBufferPtr +xmlOutputBufferCreateFile(FILE *file, xmlCharEncodingHandlerPtr encoder) { + xmlOutputBufferPtr ret; + + if (xmlOutputCallbackInitialized == 0) + xmlRegisterDefaultOutputCallbacks(); + + if (file == NULL) return(NULL); + + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = file; + ret->writecallback = xmlFileWrite; + ret->closecallback = xmlFileFlush; + } + + return(ret); +} + +/** + * xmlParserInputBufferCreateFd: + * @fd: a file descriptor number + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing for the input + * from a file descriptor + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (fd < 0) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = (void *) (long) fd; + ret->readcallback = xmlFdRead; + ret->closecallback = xmlFdClose; + } + + return(ret); +} + +/** + * xmlParserInputBufferCreateMem: + * @mem: the memory input + * @size: the length of the memory block + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing for the input + * from a memory area. + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateMem(const char *mem, int size, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (size <= 0) return(NULL); + if (mem == NULL) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = (void *) mem; + ret->readcallback = (xmlInputReadCallback) xmlNop; + ret->closecallback = NULL; + xmlBufferAdd(ret->buffer, (const xmlChar *) mem, size); + } + + return(ret); +} + +/** + * xmlOutputBufferCreateFd: + * @fd: a file descriptor number + * @encoder: the encoding converter or NULL + * + * Create a buffered output for the progressive saving + * to a file descriptor + * + * Returns the new parser output or NULL + */ +xmlOutputBufferPtr +xmlOutputBufferCreateFd(int fd, xmlCharEncodingHandlerPtr encoder) { + xmlOutputBufferPtr ret; + + if (fd < 0) return(NULL); + + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = (void *) (long) fd; + ret->writecallback = xmlFdWrite; + ret->closecallback = NULL; + } + + return(ret); +} + +/** + * xmlParserInputBufferCreateIO: + * @ioread: an I/O read function + * @ioclose: an I/O close function + * @ioctx: an I/O handler + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing for the input + * from an I/O handler + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateIO(xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, void *ioctx, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (ioread == NULL) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = (void *) ioctx; + ret->readcallback = ioread; + ret->closecallback = ioclose; + } + + return(ret); +} + +/** + * xmlOutputBufferCreateIO: + * @iowrite: an I/O write function + * @ioclose: an I/O close function + * @ioctx: an I/O handler + * @encoder: the charset encoding if known + * + * Create a buffered output for the progressive saving + * to an I/O handler + * + * Returns the new parser output or NULL + */ +xmlOutputBufferPtr +xmlOutputBufferCreateIO(xmlOutputWriteCallback iowrite, + xmlOutputCloseCallback ioclose, void *ioctx, + xmlCharEncodingHandlerPtr encoder) { + xmlOutputBufferPtr ret; + + if (iowrite == NULL) return(NULL); + + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = (void *) ioctx; + ret->writecallback = iowrite; + ret->closecallback = ioclose; + } + + return(ret); +} + +/** + * xmlParserInputBufferPush: + * @in: a buffered parser input + * @len: the size in bytes of the array. + * @buf: an char array + * + * Push the content of the arry in the input buffer + * This routine handle the I18N transcoding to internal UTF-8 + * This is used when operating the parser in progressive (push) mode. + * + * Returns the number of chars read and stored in the buffer, or -1 + * in case of error. + */ +int +xmlParserInputBufferPush(xmlParserInputBufferPtr in, + int len, const char *buf) { + int nbchars = 0; + + if (len < 0) return(0); + if (in->encoder != NULL) { + /* + * Store the data in the incoming raw buffer + */ + if (in->raw == NULL) { + in->raw = xmlBufferCreate(); + } + xmlBufferAdd(in->raw, (const xmlChar *) buf, len); + + /* + * convert as much as possible to the parser reading buffer. + */ + nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferPush: encoder error\n"); + return(-1); + } + } else { + nbchars = len; + xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars); + } +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, + "I/O: pushed %d chars, buffer %d/%d\n", + nbchars, in->buffer->use, in->buffer->size); +#endif + return(nbchars); +} + +/** + * endOfInput: + * + * When reading from an Input channel indicated end of file or error + * don't reread from it again. + */ +static int +endOfInput (void * context ATTRIBUTE_UNUSED, + char * buffer ATTRIBUTE_UNUSED, + int len ATTRIBUTE_UNUSED) { + return(0); +} + +/** + * xmlParserInputBufferGrow: + * @in: a buffered parser input + * @len: indicative value of the amount of chars to read + * + * Grow up the content of the input buffer, the old data are preserved + * This routine handle the I18N transcoding to internal UTF-8 + * This routine is used when operating the parser in normal (pull) mode + * + * TODO: one should be able to remove one extra copy by copying directly + * onto in->buffer or in->raw + * + * Returns the number of chars read and stored in the buffer, or -1 + * in case of error. + */ +int +xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) { + char *buffer = NULL; + int res = 0; + int nbchars = 0; + int buffree; + unsigned int needSize; + + if ((len <= MINLEN) && (len != 4)) + len = MINLEN; + buffree = in->buffer->size - in->buffer->use; + if (buffree <= 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferGrow : buffer full !\n"); + return(0); + } + if (len > buffree) + len = buffree; + + needSize = in->buffer->use + len + 1; + if (needSize > in->buffer->size){ + if (!xmlBufferResize(in->buffer, needSize)){ + xmlGenericError(xmlGenericErrorContext, + "xmlBufferAdd : out of memory!\n"); + return(0); + } + } + buffer = (char *)&in->buffer->content[in->buffer->use]; + + /* + * Call the read method for this I/O type. + */ + if (in->readcallback != NULL) { + res = in->readcallback(in->context, &buffer[0], len); + if (res <= 0) + in->readcallback = endOfInput; + } else { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferGrow : no input !\n"); + return(-1); + } + if (res < 0) { + return(-1); + } + len = res; + if (in->encoder != NULL) { + /* + * Store the data in the incoming raw buffer + */ + if (in->raw == NULL) { + in->raw = xmlBufferCreate(); + } + xmlBufferAdd(in->raw, (const xmlChar *) buffer, len); + + /* + * convert as much as possible to the parser reading buffer. + */ + nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferGrow: encoder error\n"); + return(-1); + } + } else { + nbchars = len; + in->buffer->use += nbchars; + buffer[nbchars] = 0; + } +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, + "I/O: read %d chars, buffer %d/%d\n", + nbchars, in->buffer->use, in->buffer->size); +#endif + return(nbchars); +} + +/** + * xmlParserInputBufferRead: + * @in: a buffered parser input + * @len: indicative value of the amount of chars to read + * + * Refresh the content of the input buffer, the old data are considered + * consumed + * This routine handle the I18N transcoding to internal UTF-8 + * + * Returns the number of chars read and stored in the buffer, or -1 + * in case of error. + */ +int +xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) { + /* xmlBufferEmpty(in->buffer); */ + if (in->readcallback != NULL) + return(xmlParserInputBufferGrow(in, len)); + else + return(-1); +} + +/** + * xmlOutputBufferWrite: + * @out: a buffered parser output + * @len: the size in bytes of the array. + * @buf: an char array + * + * Write the content of the array in the output I/O buffer + * This routine handle the I18N transcoding from internal UTF-8 + * The buffer is lossless, i.e. will store in case of partial + * or delayed writes. + * + * Returns the number of chars immediately written, or -1 + * in case of error. + */ +int +xmlOutputBufferWrite(xmlOutputBufferPtr out, int len, const char *buf) { + int nbchars = 0; /* number of chars to output to I/O */ + int ret; /* return from function call */ + int written = 0; /* number of char written to I/O so far */ + int chunk; /* number of byte curreent processed from buf */ + + if (len < 0) return(0); + + do { + chunk = len; + if (chunk > 4 * MINLEN) + chunk = 4 * MINLEN; + + /* + * first handle encoding stuff. + */ + if (out->encoder != NULL) { + /* + * Store the data in the incoming raw buffer + */ + if (out->conv == NULL) { + out->conv = xmlBufferCreate(); + } + xmlBufferAdd(out->buffer, (const xmlChar *) buf, chunk); + + if ((out->buffer->use < MINLEN) && (chunk == len)) + goto done; + + /* + * convert as much as possible to the parser reading buffer. + */ + ret = xmlCharEncOutFunc(out->encoder, out->conv, out->buffer); + if (ret < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlOutputBufferWrite: encoder error\n"); + return(-1); + } + nbchars = out->conv->use; + } else { + xmlBufferAdd(out->buffer, (const xmlChar *) buf, chunk); + nbchars = out->buffer->use; + } + buf += chunk; + len -= chunk; + + if ((nbchars < MINLEN) && (len <= 0)) + goto done; + + if (out->writecallback) { + /* + * second write the stuff to the I/O channel + */ + if (out->encoder != NULL) { + ret = out->writecallback(out->context, + (const char *)out->conv->content, nbchars); + if (ret >= 0) + xmlBufferShrink(out->conv, ret); + } else { + ret = out->writecallback(out->context, + (const char *)out->buffer->content, nbchars); + if (ret >= 0) + xmlBufferShrink(out->buffer, ret); + } + if (ret < 0) { + xmlGenericError(xmlGenericErrorContext, + "I/O: error %d writing %d bytes\n", ret, nbchars); + return(ret); + } + out->written += ret; + } + written += nbchars; + } while (len > 0); + +done: +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, + "I/O: wrote %d chars\n", written); +#endif + return(written); +} + +/** + * xmlOutputBufferWriteString: + * @out: a buffered parser output + * @str: a zero terminated C string + * + * Write the content of the string in the output I/O buffer + * This routine handle the I18N transcoding from internal UTF-8 + * The buffer is lossless, i.e. will store in case of partial + * or delayed writes. + * + * Returns the number of chars immediately written, or -1 + * in case of error. + */ +int +xmlOutputBufferWriteString(xmlOutputBufferPtr out, const char *str) { + int len; + + if (str == NULL) + return(-1); + len = strlen(str); + + if (len > 0) + return(xmlOutputBufferWrite(out, len, str)); + return(len); +} + +/** + * xmlOutputBufferFlush: + * @out: a buffered output + * + * flushes the output I/O channel + * + * Returns the number of byte written or -1 in case of error. + */ +int +xmlOutputBufferFlush(xmlOutputBufferPtr out) { + int nbchars = 0, ret = 0; + + /* + * first handle encoding stuff. + */ + if ((out->conv != NULL) && (out->encoder != NULL)) { + /* + * convert as much as possible to the parser reading buffer. + */ + nbchars = xmlCharEncOutFunc(out->encoder, out->conv, out->buffer); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlOutputBufferFlush: encoder error\n"); + return(-1); + } + } + + /* + * second flush the stuff to the I/O channel + */ + if ((out->conv != NULL) && (out->encoder != NULL) && + (out->writecallback != NULL)) { + ret = out->writecallback(out->context, + (const char *)out->conv->content, out->conv->use); + if (ret >= 0) + xmlBufferShrink(out->conv, ret); + } else if (out->writecallback != NULL) { + ret = out->writecallback(out->context, + (const char *)out->buffer->content, out->buffer->use); + if (ret >= 0) + xmlBufferShrink(out->buffer, ret); + } + if (ret < 0) { + xmlGenericError(xmlGenericErrorContext, + "I/O: error %d flushing %d bytes\n", ret, nbchars); + return(ret); + } + out->written += ret; + +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, + "I/O: flushed %d chars\n", ret); +#endif + return(ret); +} + +/** + * xmlParserGetDirectory: + * @filename: the path to a file + * + * lookup the directory for that file + * + * Returns a new allocated string containing the directory, or NULL. + */ +char * +xmlParserGetDirectory(const char *filename) { + char *ret = NULL; + char dir[1024]; + char *cur; + char sep = '/'; + +#ifdef _WIN32_WCE /* easy way by now ... wince does not have dirs! */ + return NULL; +#endif + + if (xmlInputCallbackInitialized == 0) + xmlRegisterDefaultInputCallbacks(); + + if (filename == NULL) return(NULL); +#if defined(WIN32) && !defined(__CYGWIN__) + sep = '\\'; +#endif + + strncpy(dir, filename, 1023); + dir[1023] = 0; + cur = &dir[strlen(dir)]; + while (cur > dir) { + if (*cur == sep) break; + cur --; + } + if (*cur == sep) { + if (cur == dir) dir[1] = 0; + else *cur = 0; + ret = xmlMemStrdup(dir); + } else { + if (getcwd(dir, 1024) != NULL) { + dir[1023] = 0; + ret = xmlMemStrdup(dir); + } + } + return(ret); +} + +/**************************************************************** + * * + * External entities loading * + * * + ****************************************************************/ + +#ifdef LIBXML_CATALOG_ENABLED +static int xmlSysIDExists(const char *URL) { +#ifdef HAVE_STAT + int ret; + struct stat info; + const char *path; + + if (URL == NULL) + return(0); + + if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost/", 17)) +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &URL[17]; +#else + path = &URL[16]; +#endif + else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) { +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &URL[8]; +#else + path = &URL[7]; +#endif + } else + path = URL; + ret = stat(path, &info); + if (ret == 0) + return(1); +#endif + return(0); +} +#endif + +/** + * xmlDefaultExternalEntityLoader: + * @URL: the URL for the entity to load + * @ID: the System ID for the entity to load + * @ctxt: the context in which the entity is called or NULL + * + * By default we don't load external entitites, yet. + * + * Returns a new allocated xmlParserInputPtr, or NULL. + */ +static +xmlParserInputPtr +xmlDefaultExternalEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) { + xmlParserInputPtr ret = NULL; + xmlChar *resource = NULL; +#ifdef LIBXML_CATALOG_ENABLED + xmlCatalogAllow pref; +#endif + +#ifdef DEBUG_EXTERNAL_ENTITIES + xmlGenericError(xmlGenericErrorContext, + "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL); +#endif +#ifdef LIBXML_CATALOG_ENABLED + /* + * If the resource doesn't exists as a file, + * try to load it from the resource pointed in the catalogs + */ + pref = xmlCatalogGetDefaults(); + + if ((pref != XML_CATA_ALLOW_NONE) && (!xmlSysIDExists(URL))) { + /* + * Do a local lookup + */ + if ((ctxt->catalogs != NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_DOCUMENT))) { + resource = xmlCatalogLocalResolve(ctxt->catalogs, + (const xmlChar *)ID, + (const xmlChar *)URL); + } + /* + * Try a global lookup + */ + if ((resource == NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_GLOBAL))) { + resource = xmlCatalogResolve((const xmlChar *)ID, + (const xmlChar *)URL); + } + if ((resource == NULL) && (URL != NULL)) + resource = xmlStrdup((const xmlChar *) URL); + + /* + * TODO: do an URI lookup on the reference + */ + if ((resource != NULL) && (!xmlSysIDExists((const char *)resource))) { + xmlChar *tmp = NULL; + + if ((ctxt->catalogs != NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_DOCUMENT))) { + tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource); + } + if ((tmp == NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_GLOBAL))) { + tmp = xmlCatalogResolveURI(resource); + } + + if (tmp != NULL) { + xmlFree(resource); + resource = tmp; + } + } + } +#endif + + if (resource == NULL) + resource = (xmlChar *) URL; + + if (resource == NULL) { + if (ID == NULL) + ID = "NULL"; + if ((ctxt->validate) && (ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "failed to load external entity \"%s\"\n", ID); + else if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "failed to load external entity \"%s\"\n", ID); + return(NULL); + } + ret = xmlNewInputFromFile(ctxt, (const char *)resource); + if (ret == NULL) { + if ((ctxt->validate) && (ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "failed to load external entity \"%s\"\n", resource); + else if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "failed to load external entity \"%s\"\n", resource); + } + if ((resource != NULL) && (resource != (xmlChar *) URL)) + xmlFree(resource); + return(ret); +} + +static xmlExternalEntityLoader xmlCurrentExternalEntityLoader = + xmlDefaultExternalEntityLoader; + +/** + * xmlSetExternalEntityLoader: + * @f: the new entity resolver function + * + * Changes the defaultexternal entity resolver function for the application + */ +void +xmlSetExternalEntityLoader(xmlExternalEntityLoader f) { + xmlCurrentExternalEntityLoader = f; +} + +/** + * xmlGetExternalEntityLoader: + * + * Get the default external entity resolver function for the application + * + * Returns the xmlExternalEntityLoader function pointer + */ +xmlExternalEntityLoader +xmlGetExternalEntityLoader(void) { + return(xmlCurrentExternalEntityLoader); +} + +/** + * xmlLoadExternalEntity: + * @URL: the URL for the entity to load + * @ID: the Public ID for the entity to load + * @ctxt: the context in which the entity is called or NULL + * + * Load an external entity, note that the use of this function for + * unparsed entities may generate problems + * TODO: a more generic External entity API must be designed + * + * Returns the xmlParserInputPtr or NULL + */ +xmlParserInputPtr +xmlLoadExternalEntity(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) { + return(xmlCurrentExternalEntityLoader(URL, ID, ctxt)); +} + +/************************************************************************ + * * + * Disabling Network access * + * * + ************************************************************************/ + +#ifdef LIBXML_CATALOG_ENABLED +static int +xmlNoNetExists(const char *URL) +{ +#ifdef HAVE_STAT + int ret; + struct stat info; + const char *path; + + if (URL == NULL) + return (0); + + if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost/", 17)) +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &URL[17]; +#else + path = &URL[16]; +#endif + else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) { +#if defined (_WIN32) && !defined(__CYGWIN__) + path = &URL[8]; +#else + path = &URL[7]; +#endif + } else + path = URL; + ret = stat(path, &info); + if (ret == 0) + return (1); +#endif + return (0); +} +#endif + +/** + * xmlNoNetExternalEntityLoader: + * @URL: the URL for the entity to load + * @ID: the System ID for the entity to load + * @ctxt: the context in which the entity is called or NULL + * + * A specific entity loader disabling network accesses, though still + * allowing local catalog accesses for resolution. + * + * Returns a new allocated xmlParserInputPtr, or NULL. + */ +xmlParserInputPtr +xmlNoNetExternalEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) { + xmlParserInputPtr input = NULL; + xmlChar *resource = NULL; + +#ifdef LIBXML_CATALOG_ENABLED + xmlCatalogAllow pref; + + /* + * If the resource doesn't exists as a file, + * try to load it from the resource pointed in the catalogs + */ + pref = xmlCatalogGetDefaults(); + + if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) { + /* + * Do a local lookup + */ + if ((ctxt->catalogs != NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_DOCUMENT))) { + resource = xmlCatalogLocalResolve(ctxt->catalogs, + (const xmlChar *)ID, + (const xmlChar *)URL); + } + /* + * Try a global lookup + */ + if ((resource == NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_GLOBAL))) { + resource = xmlCatalogResolve((const xmlChar *)ID, + (const xmlChar *)URL); + } + if ((resource == NULL) && (URL != NULL)) + resource = xmlStrdup((const xmlChar *) URL); + + /* + * TODO: do an URI lookup on the reference + */ + if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) { + xmlChar *tmp = NULL; + + if ((ctxt->catalogs != NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_DOCUMENT))) { + tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource); + } + if ((tmp == NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_GLOBAL))) { + tmp = xmlCatalogResolveURI(resource); + } + + if (tmp != NULL) { + xmlFree(resource); + resource = tmp; + } + } + } +#endif + if (resource == NULL) + resource = (xmlChar *) URL; + + if (resource != NULL) { + if ((!xmlStrncasecmp(BAD_CAST resource, BAD_CAST "ftp://", 6)) || + (!xmlStrncasecmp(BAD_CAST resource, BAD_CAST "http://", 7))) { + xmlGenericError(xmlGenericErrorContext, + "Attempt to load network entity %s \n", resource); + + if (resource != (xmlChar *) URL) + xmlFree(resource); + return(NULL); + } + } + input = xmlDefaultExternalEntityLoader((const char *) resource, ID, ctxt); + if (resource != (xmlChar *) URL) + xmlFree(resource); + return(input); +} + diff --git a/bundle/libxml/xmlmemory.c b/bundle/libxml/xmlmemory.c new file mode 100644 index 0000000000..2add3a321d --- /dev/null +++ b/bundle/libxml/xmlmemory.c @@ -0,0 +1,777 @@ +/* + * xmlmemory.c: libxml memory allocator wrapper. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif + +#ifdef HAVE_TIME_H +#include <time.h> +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#else +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif +#endif + +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif + +/** + * MEM_LIST: + * + * keep track of all allocated blocks for error reporting + * Always build the memory list ! + */ +#if DEBUG && !defined(MEMLIST) +#define MEM_LIST /* keep a list of all the allocated memory blocks */ +#else +#undef MEM_LIST +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/globals.h> +#include <libxml/xmlerror.h> + +void xmlMallocBreakpoint(void); +void * xmlMemMalloc(size_t size); +void * xmlMemRealloc(void *ptr,size_t size); +void xmlMemFree(void *ptr); +char * xmlMemoryStrdup(const char *str); + +/************************************************************************ + * * + * Macros, variables and associated types * + * * + ************************************************************************/ + + +#ifdef xmlMalloc +#undef xmlMalloc +#endif +#ifdef xmlRealloc +#undef xmlRealloc +#endif +#ifdef xmlMemStrdup +#undef xmlMemStrdup +#endif + + +/* + * Each of the blocks allocated begin with a header containing informations + */ + +#define MEMTAG 0x5aa5 + +#define MALLOC_TYPE 1 +#define REALLOC_TYPE 2 +#define STRDUP_TYPE 3 + +typedef struct memnod { + unsigned int mh_tag; + unsigned int mh_type; + unsigned long mh_number; + size_t mh_size; +#ifdef MEM_LIST + struct memnod *mh_next; + struct memnod *mh_prev; +#endif + const char *mh_file; + unsigned int mh_line; +} MEMHDR; + + +#ifdef SUN4 +#define ALIGN_SIZE 16 +#else +#define ALIGN_SIZE sizeof(double) +#endif +#define HDR_SIZE sizeof(MEMHDR) +#define RESERVE_SIZE (((HDR_SIZE + (ALIGN_SIZE-1)) \ + / ALIGN_SIZE ) * ALIGN_SIZE) + + +#define CLIENT_2_HDR(a) ((MEMHDR *) (((char *) (a)) - RESERVE_SIZE)) +#define HDR_2_CLIENT(a) ((void *) (((char *) (a)) + RESERVE_SIZE)) + + +static unsigned long debugMemSize = 0; +static unsigned long debugMaxMemSize = 0; +static int block=0; +static int xmlMemStopAtBlock = 0; +static void *xmlMemTraceBlockAt = NULL; +static int xmlMemInitialized = 0; +#ifdef MEM_LIST +static MEMHDR *memlist = NULL; +#endif + +static void debugmem_tag_error(void *addr); +#ifdef MEM_LIST +static void debugmem_list_add(MEMHDR *); +static void debugmem_list_delete(MEMHDR *); +#endif +#define Mem_Tag_Err(a) debugmem_tag_error(a); + +#ifndef TEST_POINT +#define TEST_POINT +#endif + +/** + * xmlMallocBreakpoint: + * + * Breakpoint to use in conjunction with xmlMemStopAtBlock. When the block + * number reaches the specified value this function is called. One need to add a breakpoint + * to it to get the context in which the given block is allocated. + */ + +void +xmlMallocBreakpoint(void) { + xmlGenericError(xmlGenericErrorContext, + "xmlMallocBreakpoint reached on block %d\n", xmlMemStopAtBlock); +} + +/** + * xmlMallocLoc: + * @size: an int specifying the size in byte to allocate. + * @file: the file name or NULL + * @line: the line number + * + * a malloc() equivalent, with logging of the allocation info. + * + * Returns a pointer to the allocated area or NULL in case of lack of memory. + */ + +void * +xmlMallocLoc(size_t size, const char * file, int line) +{ + MEMHDR *p; + void *ret; + + if (!xmlMemInitialized) xmlInitMemory(); +#ifdef DEBUG_MEMORY + xmlGenericError(xmlGenericErrorContext, + "Malloc(%d)\n",size); +#endif + + TEST_POINT + + p = (MEMHDR *) malloc(RESERVE_SIZE+size); + + if (!p) { + xmlGenericError(xmlGenericErrorContext, + "xmlMallocLoc : Out of free space\n"); + xmlMemoryDump(); + return(NULL); + } + p->mh_tag = MEMTAG; + p->mh_number = ++block; + p->mh_size = size; + p->mh_type = MALLOC_TYPE; + p->mh_file = file; + p->mh_line = line; + debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; +#ifdef MEM_LIST + debugmem_list_add(p); +#endif + +#ifdef DEBUG_MEMORY + xmlGenericError(xmlGenericErrorContext, + "Malloc(%d) Ok\n",size); +#endif + + if (xmlMemStopAtBlock == block) xmlMallocBreakpoint(); + + ret = HDR_2_CLIENT(p); + + if (xmlMemTraceBlockAt == ret) { + xmlGenericError(xmlGenericErrorContext, + "%p : Malloc(%d) Ok\n", xmlMemTraceBlockAt, size); + xmlMallocBreakpoint(); + } + + TEST_POINT + + return(ret); +} + +/** + * xmlMemMalloc: + * @size: an int specifying the size in byte to allocate. + * + * a malloc() equivalent, with logging of the allocation info. + * + * Returns a pointer to the allocated area or NULL in case of lack of memory. + */ + +void * +xmlMemMalloc(size_t size) +{ + return(xmlMallocLoc(size, "none", 0)); +} + +/** + * xmlReallocLoc: + * @ptr: the initial memory block pointer + * @size: an int specifying the size in byte to allocate. + * @file: the file name or NULL + * @line: the line number + * + * a realloc() equivalent, with logging of the allocation info. + * + * Returns a pointer to the allocated area or NULL in case of lack of memory. + */ + +void * +xmlReallocLoc(void *ptr,size_t size, const char * file, int line) +{ + MEMHDR *p; + unsigned long number; + + if (!xmlMemInitialized) xmlInitMemory(); + TEST_POINT + + p = CLIENT_2_HDR(ptr); + number = p->mh_number; + if (p->mh_tag != MEMTAG) { + Mem_Tag_Err(p); + goto error; + } + p->mh_tag = ~MEMTAG; + debugMemSize -= p->mh_size; +#ifdef MEM_LIST + debugmem_list_delete(p); +#endif + + p = (MEMHDR *) realloc(p,RESERVE_SIZE+size); + if (!p) { + goto error; + } + if (xmlMemTraceBlockAt == ptr) { + xmlGenericError(xmlGenericErrorContext, + "%p : Realloced(%d -> %d) Ok\n", + xmlMemTraceBlockAt, p->mh_size, size); + xmlMallocBreakpoint(); + } + p->mh_tag = MEMTAG; + p->mh_number = number; + p->mh_type = REALLOC_TYPE; + p->mh_size = size; + p->mh_file = file; + p->mh_line = line; + debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; +#ifdef MEM_LIST + debugmem_list_add(p); +#endif + + TEST_POINT + + return(HDR_2_CLIENT(p)); + +error: + return(NULL); +} + +/** + * xmlMemRealloc: + * @ptr: the initial memory block pointer + * @size: an int specifying the size in byte to allocate. + * + * a realloc() equivalent, with logging of the allocation info. + * + * Returns a pointer to the allocated area or NULL in case of lack of memory. + */ + +void * +xmlMemRealloc(void *ptr,size_t size) { + return(xmlReallocLoc(ptr, size, "none", 0)); +} + +/** + * xmlMemFree: + * @ptr: the memory block pointer + * + * a free() equivalent, with error checking. + */ +void +xmlMemFree(void *ptr) +{ + MEMHDR *p; + char *target; + + if (ptr == (void *) -1) { + xmlGenericError(xmlGenericErrorContext, + "trying to free pointer from freed area\n"); + goto error; + } + + if (xmlMemTraceBlockAt == ptr) { + xmlGenericError(xmlGenericErrorContext, + "%p : Freed()\n", xmlMemTraceBlockAt); + xmlMallocBreakpoint(); + } + + TEST_POINT + + target = (char *) ptr; + + p = CLIENT_2_HDR(ptr); + if (p->mh_tag != MEMTAG) { + Mem_Tag_Err(p); + goto error; + } + p->mh_tag = ~MEMTAG; + debugMemSize -= p->mh_size; + memset(target, -1, p->mh_size); + +#ifdef MEM_LIST + debugmem_list_delete(p); +#endif + free(p); + + TEST_POINT + + return; + +error: + xmlGenericError(xmlGenericErrorContext, + "xmlMemFree(%lX) error\n", (unsigned long) ptr); + xmlMallocBreakpoint(); + return; +} + +/** + * xmlMemStrdupLoc: + * @str: the initial string pointer + * @file: the file name or NULL + * @line: the line number + * + * a strdup() equivalent, with logging of the allocation info. + * + * Returns a pointer to the new string or NULL if allocation error occurred. + */ + +char * +xmlMemStrdupLoc(const char *str, const char *file, int line) +{ + char *s; + size_t size = strlen(str) + 1; + MEMHDR *p; + + if (!xmlMemInitialized) xmlInitMemory(); + TEST_POINT + + p = (MEMHDR *) malloc(RESERVE_SIZE+size); + if (!p) { + goto error; + } + p->mh_tag = MEMTAG; + p->mh_number = ++block; + p->mh_size = size; + p->mh_type = STRDUP_TYPE; + p->mh_file = file; + p->mh_line = line; + debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; +#ifdef MEM_LIST + debugmem_list_add(p); +#endif + s = (char *) HDR_2_CLIENT(p); + + if (xmlMemStopAtBlock == block) xmlMallocBreakpoint(); + + if (s != NULL) + strcpy(s,str); + else + goto error; + + TEST_POINT + + if (xmlMemTraceBlockAt == s) { + xmlGenericError(xmlGenericErrorContext, + "%p : Strdup() Ok\n", xmlMemTraceBlockAt); + xmlMallocBreakpoint(); + } + + return(s); + +error: + return(NULL); +} + +/** + * xmlMemoryStrdup: + * @str: the initial string pointer + * + * a strdup() equivalent, with logging of the allocation info. + * + * Returns a pointer to the new string or NULL if allocation error occurred. + */ + +char * +xmlMemoryStrdup(const char *str) { + return(xmlMemStrdupLoc(str, "none", 0)); +} + +/** + * xmlMemUsed: + * + * Provides the amount of memory currently allocated + * + * Returns an int representing the amount of memory allocated. + */ + +int +xmlMemUsed(void) { + return(debugMemSize); +} + +#ifdef MEM_LIST +/** + * xmlMemContentShow: + * @fp: a FILE descriptor used as the output file + * @p: a memory block header + * + * tries to show some content from the memory block + */ + +static void +xmlMemContentShow(FILE *fp, MEMHDR *p) +{ + int i,j,len = p->mh_size; + const char *buf = (const char *) HDR_2_CLIENT(p); + + if (p == NULL) { + fprintf(fp, " NULL"); + return; + } + + for (i = 0;i < len;i++) { + if (buf[i] == 0) break; + if (!isprint((unsigned char) buf[i])) break; + } + if ((i < 4) && ((buf[i] != 0) || (i == 0))) { + if (len >= 4) { + MEMHDR *q; + void *cur; + + for (j = 0;j < len -3;j += 4) { + cur = *((void **) &buf[j]); + q = CLIENT_2_HDR(cur); + p = memlist; + while (p != NULL) { + if (p == q) break; + p = p->mh_next; + } + if ((p != NULL) && (p == q)) { + fprintf(fp, " pointer to #%lu at index %d", + p->mh_number, j); + return; + } + } + } + } else if ((i == 0) && (buf[i] == 0)) { + fprintf(fp," null"); + } else { + if (buf[i] == 0) fprintf(fp," \"%.25s\"", buf); + else { + fprintf(fp," ["); + for (j = 0;j < i;j++) + fprintf(fp,"%c", buf[j]); + fprintf(fp,"]"); + } + } +} +#endif + +/** + * xmlMemShow: + * @fp: a FILE descriptor used as the output file + * @nr: number of entries to dump + * + * show a show display of the memory allocated, and dump + * the @nr last allocated areas which were not freed + */ + +void +xmlMemShow(FILE *fp, int nr) +{ +#ifdef MEM_LIST + MEMHDR *p; +#endif + + if (fp != NULL) + fprintf(fp," MEMORY ALLOCATED : %lu, MAX was %lu\n", + debugMemSize, debugMaxMemSize); +#ifdef MEM_LIST + if (nr > 0) { + fprintf(fp,"NUMBER SIZE TYPE WHERE\n"); + p = memlist; + while ((p) && nr > 0) { + fprintf(fp,"%6lu %6lu ",p->mh_number,(unsigned long)p->mh_size); + switch (p->mh_type) { + case STRDUP_TYPE:fprintf(fp,"strdup() in ");break; + case MALLOC_TYPE:fprintf(fp,"malloc() in ");break; + case REALLOC_TYPE:fprintf(fp,"realloc() in ");break; + default:fprintf(fp," ??? in ");break; + } + if (p->mh_file != NULL) + fprintf(fp,"%s(%d)", p->mh_file, p->mh_line); + if (p->mh_tag != MEMTAG) + fprintf(fp," INVALID"); + xmlMemContentShow(fp, p); + fprintf(fp,"\n"); + nr--; + p = p->mh_next; + } + } +#endif /* MEM_LIST */ +} + +/** + * xmlMemDisplay: + * @fp: a FILE descriptor used as the output file, if NULL, the result is + * written to the file .memorylist + * + * show in-extenso the memory blocks allocated + */ + +void +xmlMemDisplay(FILE *fp) +{ +#ifdef MEM_LIST + MEMHDR *p; + unsigned idx; + int nb = 0; +#if defined(HAVE_LOCALTIME) && defined(HAVE_STRFTIME) + time_t currentTime; + char buf[500]; + struct tm * tstruct; + + currentTime = time(NULL); + tstruct = localtime(¤tTime); + strftime(buf, sizeof(buf) - 1, "%I:%M:%S %p", tstruct); + fprintf(fp," %s\n\n", buf); +#endif + + + fprintf(fp," MEMORY ALLOCATED : %lu, MAX was %lu\n", + debugMemSize, debugMaxMemSize); + fprintf(fp,"BLOCK NUMBER SIZE TYPE\n"); + idx = 0; + p = memlist; + while (p) { + fprintf(fp,"%-5u %6lu %6lu ",idx++,p->mh_number, + (unsigned long)p->mh_size); + switch (p->mh_type) { + case STRDUP_TYPE:fprintf(fp,"strdup() in ");break; + case MALLOC_TYPE:fprintf(fp,"malloc() in ");break; + case REALLOC_TYPE:fprintf(fp,"realloc() in ");break; + default:fprintf(fp," ??? in ");break; + } + if (p->mh_file != NULL) fprintf(fp,"%s(%d)", p->mh_file, p->mh_line); + if (p->mh_tag != MEMTAG) + fprintf(fp," INVALID"); + nb++; + if (nb < 100) + xmlMemContentShow(fp, p); + else + fprintf(fp," skip"); + + fprintf(fp,"\n"); + p = p->mh_next; + } +#else + fprintf(fp,"Memory list not compiled (MEM_LIST not defined !)\n"); +#endif +} + +#ifdef MEM_LIST + +static void debugmem_list_add(MEMHDR *p) +{ + p->mh_next = memlist; + p->mh_prev = NULL; + if (memlist) memlist->mh_prev = p; + memlist = p; +#ifdef MEM_LIST_DEBUG + if (stderr) + Mem_Display(stderr); +#endif +} + +static void debugmem_list_delete(MEMHDR *p) +{ + if (p->mh_next) + p->mh_next->mh_prev = p->mh_prev; + if (p->mh_prev) + p->mh_prev->mh_next = p->mh_next; + else memlist = p->mh_next; +#ifdef MEM_LIST_DEBUG + if (stderr) + Mem_Display(stderr); +#endif +} + +#endif + +/* + * debugmem_tag_error: + * + * internal error function. + */ + +static void debugmem_tag_error(void *p) +{ + xmlGenericError(xmlGenericErrorContext, + "Memory tag error occurs :%p \n\t bye\n", p); +#ifdef MEM_LIST + if (stderr) + xmlMemDisplay(stderr); +#endif +} + +static FILE *xmlMemoryDumpFile = NULL; + + +/** + * xmlMemoryDump: + * + * Dump in-extenso the memory blocks allocated to the file .memorylist + */ + +void +xmlMemoryDump(void) +{ + FILE *dump; + + if (debugMaxMemSize == 0) + return; + dump = fopen(".memdump", "w"); + if (dump == NULL) + xmlMemoryDumpFile = stderr; + else xmlMemoryDumpFile = dump; + + xmlMemDisplay(xmlMemoryDumpFile); + + if (dump != NULL) fclose(dump); +} + + +/**************************************************************** + * * + * Initialization Routines * + * * + ****************************************************************/ + +static int xmlInitMemoryDone = 0; + +/** + * xmlInitMemory: + * + * Initialize the memory layer. + * + * Returns 0 on success + */ +int +xmlInitMemory(void) +{ + int ret; + +#ifdef HAVE_STDLIB_H + char *breakpoint; +#endif + + if (xmlInitMemoryDone) return(-1); + +#ifdef HAVE_STDLIB_H + breakpoint = getenv("XML_MEM_BREAKPOINT"); + if (breakpoint != NULL) { + sscanf(breakpoint, "%d", &xmlMemStopAtBlock); + } +#endif +#ifdef HAVE_STDLIB_H + breakpoint = getenv("XML_MEM_TRACE"); + if (breakpoint != NULL) { + sscanf(breakpoint, "%p", &xmlMemTraceBlockAt); + } +#endif + +#ifdef DEBUG_MEMORY + xmlGenericError(xmlGenericErrorContext, + "xmlInitMemory() Ok\n"); +#endif + xmlMemInitialized = 1; + xmlInitMemoryDone = 1; + + ret = 0; + return(ret); +} + +/** + * xmlMemSetup: + * @freeFunc: the free() function to use + * @mallocFunc: the malloc() function to use + * @reallocFunc: the realloc() function to use + * @strdupFunc: the strdup() function to use + * + * Override the default memory access functions with a new set + * This has to be called before any other libxml routines ! + * + * Should this be blocked if there was already some allocations + * done ? + * + * Returns 0 on success + */ +int +xmlMemSetup(xmlFreeFunc freeFunc, xmlMallocFunc mallocFunc, + xmlReallocFunc reallocFunc, xmlStrdupFunc strdupFunc) { + if (freeFunc == NULL) + return(-1); + if (mallocFunc == NULL) + return(-1); + if (reallocFunc == NULL) + return(-1); + if (strdupFunc == NULL) + return(-1); + xmlFree = freeFunc; + xmlMalloc = mallocFunc; + xmlRealloc = reallocFunc; + xmlMemStrdup = strdupFunc; + return(0); +} + +/** + * xmlMemGet: + * @freeFunc: place to save the free() function in use + * @mallocFunc: place to save the malloc() function in use + * @reallocFunc: place to save the realloc() function in use + * @strdupFunc: place to save the strdup() function in use + * + * Provides the memory access functions set currently in use + * + * Returns 0 on success + */ +int +xmlMemGet(xmlFreeFunc *freeFunc, xmlMallocFunc *mallocFunc, + xmlReallocFunc *reallocFunc, xmlStrdupFunc *strdupFunc) { + if (freeFunc != NULL) *freeFunc = xmlFree; + if (mallocFunc != NULL) *mallocFunc = xmlMalloc; + if (reallocFunc != NULL) *reallocFunc = xmlRealloc; + if (strdupFunc != NULL) *strdupFunc = xmlMemStrdup; + return(0); +} + diff --git a/bundle/libxml/xmlreader.c b/bundle/libxml/xmlreader.c new file mode 100644 index 0000000000..65ca8e8ae1 --- /dev/null +++ b/bundle/libxml/xmlreader.c @@ -0,0 +1,882 @@ +/* + * xmlreader.c: implements the xmlTextReader streaming node API + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#include <string.h> /* for memset() only ! */ + +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/xmlIO.h> +#include <libxml/xmlreader.h> + +/* #define DEBUG_CALLBACKS */ +/* #define DEBUG_READER */ + +/** + * TODO: + * + * macro to flag unimplemented blocks + */ +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#ifdef DEBUG_READER +#define DUMP_READER xmlTextReaderDebug(reader); +#else +#define DUMP_READER +#endif + +/************************************************************************ + * * + * The parser: maps the Text Reader API on top of the existing * + * parsing routines building a tree * + * * + ************************************************************************/ + +#define XML_TEXTREADER_INPUT 1 +#define XML_TEXTREADER_CTXT 2 + +typedef enum { + XML_TEXTREADER_MODE_NORMAL = 0, + XML_TEXTREADER_MODE_EOF = 1 +} xmlTextReaderMode; + +typedef enum { + XML_TEXTREADER_NONE = -1, + XML_TEXTREADER_START= 0, + XML_TEXTREADER_ELEMENT= 1, + XML_TEXTREADER_END= 2, + XML_TEXTREADER_EMPTY= 3, + XML_TEXTREADER_BACKTRACK= 4 +} xmlTextReaderState; + +struct _xmlTextReader { + int mode; /* the parsing mode */ + int allocs; /* what structure were deallocated */ + xmlTextReaderState state; + xmlParserCtxtPtr ctxt; /* the parser context */ + xmlSAXHandlerPtr sax; /* the parser SAX callbacks */ + xmlParserInputBufferPtr input; /* the input */ + startElementSAXFunc startElement;/* initial SAX callbacks */ + endElementSAXFunc endElement; /* idem */ + unsigned int base; /* base of the segment in the input */ + unsigned int cur; /* current position in the input */ + xmlNodePtr node; /* current node */ + int depth; /* depth of the current node */ +}; + +#ifdef DEBUG_READER +static void +xmlTextReaderDebug(xmlTextReaderPtr reader) { + if ((reader == NULL) || (reader->ctxt == NULL)) { + fprintf(stderr, "xmlTextReader NULL\n"); + return; + } + fprintf(stderr, "xmlTextReader: state %d depth %d ", + reader->state, reader->depth); + if (reader->node == NULL) { + fprintf(stderr, "node = NULL\n"); + } else { + fprintf(stderr, "node %s\n", reader->node->name); + } + fprintf(stderr, " input: base %d, cur %d, depth %d: ", + reader->base, reader->cur, reader->ctxt->nodeNr); + if (reader->input->buffer == NULL) { + fprintf(stderr, "buffer is NULL\n"); + } else { +#ifdef LIBXML_DEBUG_ENABLED + xmlDebugDumpString(stderr, + &reader->input->buffer->content[reader->cur]); +#endif + fprintf(stderr, "\n"); + } +} +#endif + +/** + * xmlTextReaderStartElement: + * @ctx: the user data (XML parser context) + * @fullname: The element name, including namespace prefix + * @atts: An array of name/value attributes pairs, NULL terminated + * + * called when an opening tag has been processed. + */ +static void +xmlTextReaderStartElement(void *ctx, const xmlChar *fullname, + const xmlChar **atts) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlTextReaderPtr reader = ctxt->_private; + +#ifdef DEBUG_CALLBACKS + printf("xmlTextReaderStartElement(%s)\n", fullname); +#endif + if ((reader != NULL) && (reader->startElement != NULL)) + reader->startElement(ctx, fullname, atts); + reader->state = XML_TEXTREADER_ELEMENT; +} + +/** + * xmlTextReaderEndElement: + * @ctx: the user data (XML parser context) + * @fullname: The element name, including namespace prefix + * + * called when an ending tag has been processed. + */ +static void +xmlTextReaderEndElement(void *ctx, const xmlChar *fullname) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlTextReaderPtr reader = ctxt->_private; + +#ifdef DEBUG_CALLBACKS + printf("xmlTextReaderEndElement(%s)\n", fullname); +#endif + if ((reader != NULL) && (reader->endElement != NULL)) + reader->endElement(ctx, fullname); + if (reader->state == XML_TEXTREADER_ELEMENT) + reader->state = XML_TEXTREADER_EMPTY; + else + reader->state = XML_TEXTREADER_END; +} + +/** + * xmlTextReaderPushData: + * @reader: the xmlTextReaderPtr used + * + * Push data down the progressive parser until a significant callback + * got raised. + * + * Returns -1 in case of failure, 0 otherwise + */ +static int +xmlTextReaderPushData(xmlTextReaderPtr reader) { + unsigned int cur = reader->cur; + xmlBufferPtr inbuf; + int val; + + if ((reader->input == NULL) || (reader->input->buffer == NULL)) + return(-1); + + reader->state = XML_TEXTREADER_NONE; + inbuf = reader->input->buffer; + while (reader->state == XML_TEXTREADER_NONE) { + if (cur >= inbuf->use) { + /* + * Refill the buffer unless we are at the end of the stream + */ + if (reader->mode != XML_TEXTREADER_MODE_EOF) { + val = xmlParserInputBufferRead(reader->input, 4096); + if (val <= 0) { + reader->mode = XML_TEXTREADER_MODE_EOF; + return(val); + } + } else + break; + } + if ((inbuf->content[cur] == '>') || (inbuf->content[cur] == '&')) { + cur = cur + 1; + val = xmlParseChunk(reader->ctxt, + (const char *) &inbuf->content[reader->cur], + cur - reader->cur, 0); + if (val != 0) + return(-1); + reader->cur = cur; + break; + } else { + cur = cur + 1; + + /* + * One may have to force a flush at some point when parsing really + * large CDATA sections + */ + if ((cur - reader->cur > 4096) && (reader->base == 0) && + (reader->mode == XML_TEXTREADER_MODE_NORMAL)) { + cur = cur + 1; + val = xmlParseChunk(reader->ctxt, + (const char *) &inbuf->content[reader->cur], + cur - reader->cur, 0); + if (val != 0) + return(-1); + reader->cur = cur; + } + } + } + /* + * Discard the consumed input when needed and possible + */ + if (reader->mode == XML_TEXTREADER_MODE_NORMAL) { + if ((reader->cur >= 4096) && (reader->base == 0)) { + val = xmlBufferShrink(inbuf, cur); + if (val >= 0) { + reader->cur -= val; + } + } + } + + /* + * At the end of the stream signal that the work is done to the Push + * parser. + */ + if ((reader->mode == XML_TEXTREADER_MODE_EOF) && (cur >= inbuf->use)) { + val = xmlParseChunk(reader->ctxt, + (const char *) &inbuf->content[reader->cur], 0, 1); + } + return(0); +} + +/** + * xmlTextReaderRead: + * @reader: the xmlTextReaderPtr used + * + * Moves the position of the current instance to the next node in + * the stream, exposing its properties. + * + * Returns 1 if the node was read successfully, 0 if there is no more + * nodes to read, or -1 in case of error + */ +int +xmlTextReaderRead(xmlTextReaderPtr reader) { + int val, olddepth; + xmlTextReaderState oldstate; + xmlNodePtr oldnode; + + if ((reader == NULL) || (reader->ctxt == NULL)) + return(-1); + if (reader->ctxt->wellFormed != 1) + return(-1); + +#ifdef DEBUG_READER + fprintf(stderr, "\nREAD "); + DUMP_READER +#endif + if (reader->node == NULL) { + /* + * Initial state + */ + do { + val = xmlTextReaderPushData(reader); + if (val < 0) + return(-1); + } while ((reader->ctxt->node == NULL) && + (reader->mode != XML_TEXTREADER_MODE_EOF)); + if (reader->ctxt->node == NULL) { + if (reader->ctxt->myDoc != NULL) + reader->node = reader->ctxt->myDoc->children; + if (reader->node == NULL) + return(-1); + } else { + reader->node = reader->ctxt->node; + } + reader->depth = 1; + return(1); + } + oldstate = reader->state; + olddepth = reader->ctxt->nodeNr; + oldnode = reader->node; + /* + * If we are not backtracking on ancestors or examined nodes, + * that the parser didn't finished or that we arent at the end + * of stream, continue processing. + */ + if (oldstate != XML_TEXTREADER_BACKTRACK) { + while (((reader->node->children == NULL) || + (reader->node->type == XML_ENTITY_REF_NODE) || + (reader->node->type == XML_DTD_NODE)) && + (reader->node->next == NULL) && + (reader->ctxt->nodeNr == olddepth) && + (reader->ctxt->instate != XML_PARSER_EOF)) { + val = xmlTextReaderPushData(reader); + if (val < 0) + return(-1); + if (reader->node == NULL) + return(0); + } + if ((reader->node->children != NULL) && + (reader->node->type != XML_ENTITY_REF_NODE) && + (reader->node->type != XML_DTD_NODE)) { + reader->node = reader->node->children; + reader->depth++; + if ((reader->state != XML_TEXTREADER_ELEMENT) && + (reader->state != XML_TEXTREADER_EMPTY)) + reader->state = XML_TEXTREADER_ELEMENT; + DUMP_READER + return(1); + } + } + if (reader->node->next != NULL) { + if ((oldstate == XML_TEXTREADER_ELEMENT) && + (reader->node->type == XML_ELEMENT_NODE)) { + reader->state = XML_TEXTREADER_END; + DUMP_READER + return(1); + } + reader->node = reader->node->next; + reader->state = XML_TEXTREADER_ELEMENT; + DUMP_READER + /* + * Cleanup of the old node + */ + if (oldnode->type != XML_DTD_NODE) { + xmlUnlinkNode(oldnode); + xmlFreeNode(oldnode); + } + + return(1); + } + reader->node = reader->node->parent; + if ((reader->node == NULL) || + (reader->node->type == XML_DOCUMENT_NODE) || +#ifdef LIBXML_DOCB_ENABLED + (reader->node->type == XML_DOCB_DOCUMENT_NODE) || +#endif + (reader->node->type == XML_HTML_DOCUMENT_NODE)) { + reader->node = NULL; + reader->depth = 0; + + /* + * Cleanup of the old node + */ + if (oldnode->type != XML_DTD_NODE) { + xmlUnlinkNode(oldnode); + xmlFreeNode(oldnode); + } + + return(0); + } + reader->depth--; + reader->state = XML_TEXTREADER_BACKTRACK; + DUMP_READER + return(1); +} + +/************************************************************************ + * * + * Constructor and destructors * + * * + ************************************************************************/ +/** + * xmlNewTextReader: + * @input: the xmlParserInputBufferPtr used to read data + * + * Create an xmlTextReader structure fed with @input + * + * Returns the new xmlTextReaderPtr or NULL in case of error + */ +xmlTextReaderPtr +xmlNewTextReader(xmlParserInputBufferPtr input) { + xmlTextReaderPtr ret; + int val; + + if (input == NULL) + return(NULL); + ret = xmlMalloc(sizeof(xmlTextReader)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewTextReader : malloc failed\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlTextReader)); + ret->input = input; + ret->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); + if (ret->sax == NULL) { + xmlFree(ret); + xmlGenericError(xmlGenericErrorContext, + "xmlNewTextReader : malloc failed\n"); + return(NULL); + } + memcpy(ret->sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler)); + ret->startElement = ret->sax->startElement; + ret->sax->startElement = xmlTextReaderStartElement; + ret->endElement = ret->sax->endElement; + ret->sax->endElement = xmlTextReaderEndElement; + + ret->mode = XML_TEXTREADER_MODE_NORMAL; + ret->node = NULL; + val = xmlParserInputBufferRead(input, 4); + if (val >= 4) { + ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL, + (const char *) ret->input->buffer->content, 4, NULL); + ret->base = 0; + ret->cur = 4; + } else { + ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL, NULL, 0, NULL); + ret->base = 0; + ret->cur = 0; + } + ret->ctxt->_private = ret; + ret->allocs = XML_TEXTREADER_CTXT; + return(ret); + +} + +/** + * xmlNewTextReaderFilename: + * @URI: the URI of the resource to process + * + * Create an xmlTextReader structure fed with the resource at @URI + * + * Returns the new xmlTextReaderPtr or NULL in case of error + */ +xmlTextReaderPtr +xmlNewTextReaderFilename(const char *URI) { + xmlParserInputBufferPtr input; + xmlTextReaderPtr ret; + + input = xmlParserInputBufferCreateFilename(URI, XML_CHAR_ENCODING_NONE); + if (input == NULL) + return(NULL); + ret = xmlNewTextReader(input); + if (ret == NULL) { + xmlFreeParserInputBuffer(input); + return(NULL); + } + ret->allocs |= XML_TEXTREADER_INPUT; + return(ret); +} + +/** + * xmlFreeTextReader: + * @reader: the xmlTextReaderPtr + * + * Deallocate all the resources associated to the reader + */ +void +xmlFreeTextReader(xmlTextReaderPtr reader) { + if (reader == NULL) + return; + if (reader->ctxt != NULL) { + if (reader->ctxt->myDoc != NULL) { + xmlFreeDoc(reader->ctxt->myDoc); + reader->ctxt->myDoc = NULL; + } + if (reader->allocs & XML_TEXTREADER_CTXT) + xmlFreeParserCtxt(reader->ctxt); + } + if (reader->sax != NULL) + xmlFree(reader->sax); + if ((reader->input != NULL) && (reader->allocs & XML_TEXTREADER_INPUT)) + xmlFreeParserInputBuffer(reader->input); + xmlFree(reader); +} + +/************************************************************************ + * * + * Acces API to the current node * + * * + ************************************************************************/ +/** + * xmlTextReaderAttributeCount: + * @reader: the xmlTextReaderPtr used + * + * Provides the number of attributes of the current node + * + * Returns 0 i no attributes, -1 in case of error or the attribute count + */ +int +xmlTextReaderAttributeCount(xmlTextReaderPtr reader) { + int ret; + xmlAttrPtr attr; + + if (reader == NULL) + return(-1); + if (reader->node == NULL) + return(0); + if (reader->node->type != XML_ELEMENT_NODE) + return(0); + if ((reader->state == XML_TEXTREADER_END) || + (reader->state == XML_TEXTREADER_BACKTRACK)) + return(0); + ret = 0; + attr = reader->node->properties; + while (attr != NULL) { + ret++; + attr = attr->next; + } + return(ret); +} + +/** + * xmlTextReaderNodeType: + * @reader: the xmlTextReaderPtr used + * + * Get the node type of the current node + * Reference: + * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html + * + * Returns the xmlNodeType of the current node or -1 in case of error + */ +int +xmlTextReaderNodeType(xmlTextReaderPtr reader) { + if (reader == NULL) + return(-1); + if (reader->node == NULL) + return(0); + switch (reader->node->type) { + case XML_ELEMENT_NODE: + if ((reader->state == XML_TEXTREADER_END) || + (reader->state == XML_TEXTREADER_BACKTRACK)) + return(15); + return(1); + case XML_ATTRIBUTE_NODE: + return(2); + case XML_TEXT_NODE: + return(3); /* TODO: SignificantWhitespace == 14 Whitespace == 13 */ + case XML_CDATA_SECTION_NODE: + return(4); + case XML_ENTITY_REF_NODE: + return(5); + case XML_ENTITY_NODE: + return(6); + case XML_PI_NODE: + return(7); + case XML_COMMENT_NODE: + return(8); + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return(9); + case XML_DOCUMENT_FRAG_NODE: + return(11); + case XML_NOTATION_NODE: + return(12); + case XML_DOCUMENT_TYPE_NODE: + case XML_DTD_NODE: + return(10); + + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return(0); + } + return(-1); +} + +/** + * xmlTextReaderIsEmptyElement: + * @reader: the xmlTextReaderPtr used + * + * Check if the current node is empty + * + * Returns 1 if empty, 0 if not and -1 in case of error + */ +int +xmlTextReaderIsEmptyElement(xmlTextReaderPtr reader) { + if ((reader == NULL) || (reader->node == NULL)) + return(-1); + if (reader->node->children != NULL) + return(0); + if ((reader->state == XML_TEXTREADER_EMPTY) || + (reader->state == XML_TEXTREADER_BACKTRACK)) + return(1); + return(0); +} + +/** + * xmlTextReaderLocalName: + * @reader: the xmlTextReaderPtr used + * + * The local name of the node. + * + * Returns the local name or NULL if not available + */ +xmlChar * +xmlTextReaderLocalName(xmlTextReaderPtr reader) { + if ((reader == NULL) || (reader->node == NULL)) + return(NULL); + if ((reader->node->type != XML_ELEMENT_NODE) && + (reader->node->type != XML_ATTRIBUTE_NODE)) + return(xmlTextReaderName(reader)); + return(xmlStrdup(reader->node->name)); +} + +/** + * xmlTextReaderName: + * @reader: the xmlTextReaderPtr used + * + * The qualified name of the node, equal to Prefix :LocalName. + * + * Returns the local name or NULL if not available + */ +xmlChar * +xmlTextReaderName(xmlTextReaderPtr reader) { + xmlChar *ret; + + if ((reader == NULL) || (reader->node == NULL)) + return(NULL); + switch (reader->node->type) { + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + if ((reader->node->ns == NULL) || + (reader->node->ns->prefix == NULL)) + return(xmlStrdup(reader->node->name)); + + ret = xmlStrdup(reader->node->ns->prefix); + ret = xmlStrcat(ret, BAD_CAST ":"); + ret = xmlStrcat(ret, reader->node->name); + return(ret); + case XML_TEXT_NODE: + return(xmlStrdup(BAD_CAST "#text")); + case XML_CDATA_SECTION_NODE: + return(xmlStrdup(BAD_CAST "#cdata-section")); + case XML_ENTITY_NODE: + case XML_ENTITY_REF_NODE: + return(xmlStrdup(reader->node->name)); + case XML_PI_NODE: + return(xmlStrdup(reader->node->name)); + case XML_COMMENT_NODE: + return(xmlStrdup(BAD_CAST "#comment")); + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return(xmlStrdup(BAD_CAST "#document")); + case XML_DOCUMENT_FRAG_NODE: + return(xmlStrdup(BAD_CAST "#document-fragment")); + case XML_NOTATION_NODE: + return(xmlStrdup(reader->node->name)); + case XML_DOCUMENT_TYPE_NODE: + case XML_DTD_NODE: + return(xmlStrdup(reader->node->name)); + + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return(NULL); + } + return(NULL); +} + +/** + * xmlTextReaderPrefix: + * @reader: the xmlTextReaderPtr used + * + * A shorthand reference to the namespace associated with the node. + * + * Returns the prefix or NULL if not available + */ +xmlChar * +xmlTextReaderPrefix(xmlTextReaderPtr reader) { + if ((reader == NULL) || (reader->node == NULL)) + return(NULL); + if ((reader->node->type != XML_ELEMENT_NODE) && + (reader->node->type != XML_ATTRIBUTE_NODE)) + return(NULL); + if ((reader->node->ns != NULL) || (reader->node->ns->prefix != NULL)) + return(xmlStrdup(reader->node->ns->prefix)); + return(NULL); +} + +/** + * xmlTextReaderNamespaceUri: + * @reader: the xmlTextReaderPtr used + * + * The URI defining the namespace associated with the node. + * + * Returns the namespace URI or NULL if not available + */ +xmlChar * +xmlTextReaderNamespaceUri(xmlTextReaderPtr reader) { + if ((reader == NULL) || (reader->node == NULL)) + return(NULL); + if ((reader->node->type != XML_ELEMENT_NODE) && + (reader->node->type != XML_ATTRIBUTE_NODE)) + return(NULL); + if (reader->node->ns != NULL) + return(xmlStrdup(reader->node->ns->href)); + return(NULL); +} + +/** + * xmlTextReaderBaseUri: + * @reader: the xmlTextReaderPtr used + * + * The base URI of the node. + * + * Returns the base URI or NULL if not available + */ +xmlChar * +xmlTextReaderBaseUri(xmlTextReaderPtr reader) { + if ((reader == NULL) || (reader->node == NULL)) + return(NULL); + return(xmlNodeGetBase(NULL, reader->node)); +} + +/** + * xmlTextReaderDepth: + * @reader: the xmlTextReaderPtr used + * + * The depth of the node in the tree. + * + * Returns the depth or -1 in case of error + */ +int +xmlTextReaderDepth(xmlTextReaderPtr reader) { + if (reader == NULL) + return(-1); + if (reader->node == NULL) + return(0); + + return(reader->depth); +} + +/** + * xmlTextReaderHasAttributes: + * @reader: the xmlTextReaderPtr used + * + * Whether the node has attributes. + * + * Returns 1 if true, 0 if false, and -1 in case or error + */ +int +xmlTextReaderHasAttributes(xmlTextReaderPtr reader) { + if (reader == NULL) + return(-1); + if (reader->node == NULL) + return(0); + + if ((reader->node->type == XML_ELEMENT_NODE) && + (reader->node->properties != NULL)) + return(1); + /* TODO: handle the xmlDecl */ + return(0); +} + +/** + * xmlTextReaderHasValue: + * @reader: the xmlTextReaderPtr used + * + * Whether the node can have a text value. + * + * Returns 1 if true, 0 if false, and -1 in case or error + */ +int +xmlTextReaderHasValue(xmlTextReaderPtr reader) { + if (reader == NULL) + return(-1); + if (reader->node == NULL) + return(0); + + switch (reader->node->type) { + case XML_ATTRIBUTE_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + return(1); + default: + return(0); + } + return(0); +} + +/** + * xmlTextReaderValue: + * @reader: the xmlTextReaderPtr used + * + * Provides the text value of the node if present + * + * Returns the string or NULL if not available. The retsult must be deallocated + * with xmlFree() + */ +xmlChar * +xmlTextReaderValue(xmlTextReaderPtr reader) { + if (reader == NULL) + return(NULL); + if (reader->node == NULL) + return(NULL); + + switch (reader->node->type) { + case XML_ATTRIBUTE_NODE:{ + xmlAttrPtr attr = (xmlAttrPtr) reader->node; + + if (attr->parent != NULL) + return (xmlNodeListGetString + (attr->parent->doc, attr->children, 1)); + else + return (xmlNodeListGetString(NULL, attr->children, 1)); + break; + } + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + if (reader->node->content != NULL) + return (xmlStrdup(reader->node->content)); + default: + return(NULL); + } + return(NULL); +} + +/** + * xmlTextReaderIsDefault: + * @reader: the xmlTextReaderPtr used + * + * Whether an Attribute node was generated from the default value + * defined in the DTD or schema. + * + * Returns 0 if not defaulted, 1 if defaulted, and -1 in case of error + */ +int +xmlTextReaderIsDefault(xmlTextReaderPtr reader) { + if (reader == NULL) + return(-1); + return(0); +} + +/** + * xmlTextReaderQuoteChar: + * @reader: the xmlTextReaderPtr used + * + * The quotation mark character used to enclose the value of an attribute. + * + * Returns " or ' and -1 in case of error + */ +int +xmlTextReaderQuoteChar(xmlTextReaderPtr reader) { + if (reader == NULL) + return(-1); + /* TODO maybe lookup the attribute value for " first */ + return((int) '"'); +} + +/** + * xmlTextReaderXmlLang: + * @reader: the xmlTextReaderPtr used + * + * The xml:lang scope within which the node resides. + * + * Returns the xml:lang value or NULL if none exists. + */ +xmlChar * +xmlTextReaderXmlLang(xmlTextReaderPtr reader) { + if (reader == NULL) + return(NULL); + if (reader->node == NULL) + return(NULL); + return(xmlNodeGetLang(reader->node)); +} + diff --git a/bundle/libxml/xmlregexp.c b/bundle/libxml/xmlregexp.c new file mode 100644 index 0000000000..129c1269ec --- /dev/null +++ b/bundle/libxml/xmlregexp.c @@ -0,0 +1,4319 @@ +/* + * regexp.c: generic and extensible Regular Expression engine + * + * Basically designed with the purpose of compiling regexps for + * the variety of validation/shemas mechanisms now available in + * XML related specifications thise includes: + * - XML-1.0 DTD validation + * - XML Schemas structure part 1 + * - XML Schemas Datatypes part 2 especially Appendix F + * - RELAX-NG/TREX i.e. the counter proposal + * + * See Copyright for the status of this software. + * + * Daniel Veillard <veillard@redhat.com> + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_REGEXP_ENABLED + +#include <stdio.h> +#include <string.h> +#include <libxml/tree.h> +#include <libxml/parserInternals.h> +#include <libxml/xmlregexp.h> +#include <libxml/xmlautomata.h> +#include <libxml/xmlunicode.h> + +/* #define DEBUG_REGEXP_GRAPH */ +/* #define DEBUG_REGEXP_EXEC */ +/* #define DEBUG_PUSH */ +/* #define DEBUG_COMPACTION */ + +#define ERROR(str) ctxt->error = 1; \ + xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur) +#define NEXT ctxt->cur++ +#define CUR (*(ctxt->cur)) +#define NXT(index) (ctxt->cur[index]) + +#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) +#define NEXTL(l) ctxt->cur += l; + +/** + * TODO: + * + * macro to flag unimplemented blocks + */ +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + + +/************************************************************************ + * * + * Datatypes and structures * + * * + ************************************************************************/ + +typedef enum { + XML_REGEXP_EPSILON = 1, + XML_REGEXP_CHARVAL, + XML_REGEXP_RANGES, + XML_REGEXP_SUBREG, + XML_REGEXP_STRING, + XML_REGEXP_ANYCHAR, /* . */ + XML_REGEXP_ANYSPACE, /* \s */ + XML_REGEXP_NOTSPACE, /* \S */ + XML_REGEXP_INITNAME, /* \l */ + XML_REGEXP_NOTINITNAME, /* \l */ + XML_REGEXP_NAMECHAR, /* \c */ + XML_REGEXP_NOTNAMECHAR, /* \C */ + XML_REGEXP_DECIMAL, /* \d */ + XML_REGEXP_NOTDECIMAL, /* \d */ + XML_REGEXP_REALCHAR, /* \w */ + XML_REGEXP_NOTREALCHAR, /* \w */ + XML_REGEXP_LETTER, + XML_REGEXP_LETTER_UPPERCASE, + XML_REGEXP_LETTER_LOWERCASE, + XML_REGEXP_LETTER_TITLECASE, + XML_REGEXP_LETTER_MODIFIER, + XML_REGEXP_LETTER_OTHERS, + XML_REGEXP_MARK, + XML_REGEXP_MARK_NONSPACING, + XML_REGEXP_MARK_SPACECOMBINING, + XML_REGEXP_MARK_ENCLOSING, + XML_REGEXP_NUMBER, + XML_REGEXP_NUMBER_DECIMAL, + XML_REGEXP_NUMBER_LETTER, + XML_REGEXP_NUMBER_OTHERS, + XML_REGEXP_PUNCT, + XML_REGEXP_PUNCT_CONNECTOR, + XML_REGEXP_PUNCT_DASH, + XML_REGEXP_PUNCT_OPEN, + XML_REGEXP_PUNCT_CLOSE, + XML_REGEXP_PUNCT_INITQUOTE, + XML_REGEXP_PUNCT_FINQUOTE, + XML_REGEXP_PUNCT_OTHERS, + XML_REGEXP_SEPAR, + XML_REGEXP_SEPAR_SPACE, + XML_REGEXP_SEPAR_LINE, + XML_REGEXP_SEPAR_PARA, + XML_REGEXP_SYMBOL, + XML_REGEXP_SYMBOL_MATH, + XML_REGEXP_SYMBOL_CURRENCY, + XML_REGEXP_SYMBOL_MODIFIER, + XML_REGEXP_SYMBOL_OTHERS, + XML_REGEXP_OTHER, + XML_REGEXP_OTHER_CONTROL, + XML_REGEXP_OTHER_FORMAT, + XML_REGEXP_OTHER_PRIVATE, + XML_REGEXP_OTHER_NA, + XML_REGEXP_BLOCK_NAME +} xmlRegAtomType; + +typedef enum { + XML_REGEXP_QUANT_EPSILON = 1, + XML_REGEXP_QUANT_ONCE, + XML_REGEXP_QUANT_OPT, + XML_REGEXP_QUANT_MULT, + XML_REGEXP_QUANT_PLUS, + XML_REGEXP_QUANT_ONCEONLY, + XML_REGEXP_QUANT_ALL, + XML_REGEXP_QUANT_RANGE +} xmlRegQuantType; + +typedef enum { + XML_REGEXP_START_STATE = 1, + XML_REGEXP_FINAL_STATE, + XML_REGEXP_TRANS_STATE +} xmlRegStateType; + +typedef enum { + XML_REGEXP_MARK_NORMAL = 0, + XML_REGEXP_MARK_START, + XML_REGEXP_MARK_VISITED +} xmlRegMarkedType; + +typedef struct _xmlRegRange xmlRegRange; +typedef xmlRegRange *xmlRegRangePtr; + +struct _xmlRegRange { + int neg; + xmlRegAtomType type; + int start; + int end; + xmlChar *blockName; +}; + +typedef struct _xmlRegAtom xmlRegAtom; +typedef xmlRegAtom *xmlRegAtomPtr; + +typedef struct _xmlAutomataState xmlRegState; +typedef xmlRegState *xmlRegStatePtr; + +struct _xmlRegAtom { + int no; + xmlRegAtomType type; + xmlRegQuantType quant; + int min; + int max; + + void *valuep; + void *valuep2; + int neg; + int codepoint; + xmlRegStatePtr start; + xmlRegStatePtr stop; + int maxRanges; + int nbRanges; + xmlRegRangePtr *ranges; + void *data; +}; + +typedef struct _xmlRegCounter xmlRegCounter; +typedef xmlRegCounter *xmlRegCounterPtr; + +struct _xmlRegCounter { + int min; + int max; +}; + +typedef struct _xmlRegTrans xmlRegTrans; +typedef xmlRegTrans *xmlRegTransPtr; + +struct _xmlRegTrans { + xmlRegAtomPtr atom; + int to; + int counter; + int count; +}; + +struct _xmlAutomataState { + xmlRegStateType type; + xmlRegMarkedType mark; + xmlRegMarkedType reached; + int no; + + int maxTrans; + int nbTrans; + xmlRegTrans *trans; +}; + +typedef struct _xmlAutomata xmlRegParserCtxt; +typedef xmlRegParserCtxt *xmlRegParserCtxtPtr; + +struct _xmlAutomata { + xmlChar *string; + xmlChar *cur; + + int error; + int neg; + + xmlRegStatePtr start; + xmlRegStatePtr end; + xmlRegStatePtr state; + + xmlRegAtomPtr atom; + + int maxAtoms; + int nbAtoms; + xmlRegAtomPtr *atoms; + + int maxStates; + int nbStates; + xmlRegStatePtr *states; + + int maxCounters; + int nbCounters; + xmlRegCounter *counters; + + int determinist; +}; + +struct _xmlRegexp { + xmlChar *string; + int nbStates; + xmlRegStatePtr *states; + int nbAtoms; + xmlRegAtomPtr *atoms; + int nbCounters; + xmlRegCounter *counters; + int determinist; + /* + * That's the compact form for determinists automatas + */ + int nbstates; + int *compact; + void **transdata; + int nbstrings; + xmlChar **stringMap; +}; + +typedef struct _xmlRegExecRollback xmlRegExecRollback; +typedef xmlRegExecRollback *xmlRegExecRollbackPtr; + +struct _xmlRegExecRollback { + xmlRegStatePtr state;/* the current state */ + int index; /* the index in the input stack */ + int nextbranch; /* the next transition to explore in that state */ + int *counts; /* save the automate state if it has some */ +}; + +typedef struct _xmlRegInputToken xmlRegInputToken; +typedef xmlRegInputToken *xmlRegInputTokenPtr; + +struct _xmlRegInputToken { + xmlChar *value; + void *data; +}; + +struct _xmlRegExecCtxt { + int status; /* execution status != 0 indicate an error */ + int determinist; /* did we found an inderterministic behaviour */ + xmlRegexpPtr comp; /* the compiled regexp */ + xmlRegExecCallbacks callback; + void *data; + + xmlRegStatePtr state;/* the current state */ + int transno; /* the current transition on that state */ + int transcount; /* the number of char in char counted transitions */ + + /* + * A stack of rollback states + */ + int maxRollbacks; + int nbRollbacks; + xmlRegExecRollback *rollbacks; + + /* + * The state of the automata if any + */ + int *counts; + + /* + * The input stack + */ + int inputStackMax; + int inputStackNr; + int index; + int *charStack; + const xmlChar *inputString; /* when operating on characters */ + xmlRegInputTokenPtr inputStack;/* when operating on strings */ + +}; + +#define REGEXP_ALL_COUNTER 0x123456 +#define REGEXP_ALL_LAX_COUNTER 0x123457 + +static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top); +static void xmlRegFreeState(xmlRegStatePtr state); +static void xmlRegFreeAtom(xmlRegAtomPtr atom); + +/************************************************************************ + * * + * Allocation/Deallocation * + * * + ************************************************************************/ + +static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); +/** + * xmlRegEpxFromParse: + * @ctxt: the parser context used to build it + * + * Allocate a new regexp and fill it with the reult from the parser + * + * Returns the new regexp or NULL in case of error + */ +static xmlRegexpPtr +xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) { + xmlRegexpPtr ret; + + ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp)); + if (ret == NULL) + return(NULL); + memset(ret, 0, sizeof(xmlRegexp)); + ret->string = ctxt->string; + ctxt->string = NULL; + ret->nbStates = ctxt->nbStates; + ctxt->nbStates = 0; + ret->states = ctxt->states; + ctxt->states = NULL; + ret->nbAtoms = ctxt->nbAtoms; + ctxt->nbAtoms = 0; + ret->atoms = ctxt->atoms; + ctxt->atoms = NULL; + ret->nbCounters = ctxt->nbCounters; + ctxt->nbCounters = 0; + ret->counters = ctxt->counters; + ctxt->counters = NULL; + ret->determinist = ctxt->determinist; + + if ((ret->determinist != 0) && + (ret->nbCounters == 0) && + (ret->atoms != NULL) && + (ret->atoms[0] != NULL) && + (ret->atoms[0]->type == XML_REGEXP_STRING)) { + int i, j, nbstates = 0, nbatoms = 0; + int *stateRemap; + int *stringRemap; + int *transitions; + void **transdata; + xmlChar **stringMap; + xmlChar *value; + + /* + * Switch to a compact representation + * 1/ counting the effective number of states left + * 2/ conting the unique number of atoms, and check that + * they are all of the string type + * 3/ build a table state x atom for the transitions + */ + + stateRemap = xmlMalloc(ret->nbStates * sizeof(int)); + for (i = 0;i < ret->nbStates;i++) { + if (ret->states[i] != NULL) { + stateRemap[i] = nbstates; + nbstates++; + } else { + stateRemap[i] = -1; + } + } +#ifdef DEBUG_COMPACTION + printf("Final: %d states\n", nbstates); +#endif + stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *)); + stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int)); + for (i = 0;i < ret->nbAtoms;i++) { + if ((ret->atoms[i]->type == XML_REGEXP_STRING) && + (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) { + value = ret->atoms[i]->valuep; + for (j = 0;j < nbatoms;j++) { + if (xmlStrEqual(stringMap[j], value)) { + stringRemap[i] = j; + break; + } + } + if (j >= nbatoms) { + stringRemap[i] = nbatoms; + stringMap[nbatoms] = xmlStrdup(value); + nbatoms++; + } + } else { + xmlFree(stateRemap); + xmlFree(stringRemap); + for (i = 0;i < nbatoms;i++) + xmlFree(stringMap[i]); + xmlFree(stringMap); + goto fail_compact; + } + } +#ifdef DEBUG_COMPACTION + printf("Final: %d atoms\n", nbatoms); +#endif + + /* + * Allocate the transition table. The first entry for each + * state correspond to the state type. + */ + transitions = (int *) xmlMalloc(nbstates * (nbatoms + 1) * sizeof(int)); + transdata = NULL; + memset(transitions, 0, nbstates * (nbatoms + 1) * sizeof(int)); + + for (i = 0;i < ret->nbStates;i++) { + int stateno, atomno, targetno, prev; + xmlRegStatePtr state; + xmlRegTransPtr trans; + + stateno = stateRemap[i]; + if (stateno == -1) + continue; + state = ret->states[i]; + + transitions[stateno * (nbatoms + 1)] = state->type; + + for (j = 0;j < state->nbTrans;j++) { + trans = &(state->trans[j]); + if ((trans->to == -1) || (trans->atom == NULL)) + continue; + atomno = stringRemap[trans->atom->no]; + if ((trans->atom->data != NULL) && (transdata == NULL)) { + transdata = (void **) xmlMalloc(nbstates * nbatoms * + sizeof(void *)); + if (transdata != NULL) + memset(transdata, 0, + nbstates * nbatoms * sizeof(void *)); + } + targetno = stateRemap[trans->to]; + /* + * if the same atome can generate transition to 2 different + * states then it means the automata is not determinist and + * the compact form can't be used ! + */ + prev = transitions[stateno * (nbatoms + 1) + atomno + 1]; + if (prev != 0) { + if (prev != targetno + 1) { + printf("not determinist\n"); + ret->determinist = 0; +#ifdef DEBUG_COMPACTION + printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n", + i, j, trans->atom->no, trans->to, atomno, targetno); + printf(" previous to is %d\n", prev); +#endif + ret->determinist = 0; + if (transdata != NULL) + xmlFree(transdata); + xmlFree(transitions); + xmlFree(stateRemap); + xmlFree(stringRemap); + for (i = 0;i < nbatoms;i++) + xmlFree(stringMap[i]); + xmlFree(stringMap); + goto fail_compact; + } + } else { +#if 0 + printf("State %d trans %d: atom %d to %d : %d to %d\n", + i, j, trans->atom->no, trans->to, atomno, targetno); +#endif + transitions[stateno * (nbatoms + 1) + atomno + 1] = + targetno + 1; /* to avoid 0 */ + if (transdata != NULL) + transdata[stateno * nbatoms + atomno] = + trans->atom->data; + } + } + } + ret->determinist = 1; +#ifdef DEBUG_COMPACTION + /* + * Debug + */ + for (i = 0;i < nbstates;i++) { + for (j = 0;j < nbatoms + 1;j++) { + printf("%02d ", transitions[i * (nbatoms + 1) + j]); + } + printf("\n"); + } + printf("\n"); +#endif + /* + * Cleanup of the old data + */ + if (ret->states != NULL) { + for (i = 0;i < ret->nbStates;i++) + xmlRegFreeState(ret->states[i]); + xmlFree(ret->states); + } + ret->states = NULL; + ret->nbStates = 0; + if (ret->atoms != NULL) { + for (i = 0;i < ret->nbAtoms;i++) + xmlRegFreeAtom(ret->atoms[i]); + xmlFree(ret->atoms); + } + ret->atoms = NULL; + ret->nbAtoms = 0; + + ret->compact = transitions; + ret->transdata = transdata; + ret->stringMap = stringMap; + ret->nbstrings = nbatoms; + ret->nbstates = nbstates; + xmlFree(stateRemap); + xmlFree(stringRemap); + } +fail_compact: + return(ret); +} + +/** + * xmlRegNewParserCtxt: + * @string: the string to parse + * + * Allocate a new regexp parser context + * + * Returns the new context or NULL in case of error + */ +static xmlRegParserCtxtPtr +xmlRegNewParserCtxt(const xmlChar *string) { + xmlRegParserCtxtPtr ret; + + ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt)); + if (ret == NULL) + return(NULL); + memset(ret, 0, sizeof(xmlRegParserCtxt)); + if (string != NULL) + ret->string = xmlStrdup(string); + ret->cur = ret->string; + ret->neg = 0; + ret->error = 0; + ret->determinist = -1; + return(ret); +} + +/** + * xmlRegNewRange: + * @ctxt: the regexp parser context + * @neg: is that negative + * @type: the type of range + * @start: the start codepoint + * @end: the end codepoint + * + * Allocate a new regexp range + * + * Returns the new range or NULL in case of error + */ +static xmlRegRangePtr +xmlRegNewRange(xmlRegParserCtxtPtr ctxt, + int neg, xmlRegAtomType type, int start, int end) { + xmlRegRangePtr ret; + + ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange)); + if (ret == NULL) { + ERROR("failed to allocate regexp range"); + return(NULL); + } + ret->neg = neg; + ret->type = type; + ret->start = start; + ret->end = end; + return(ret); +} + +/** + * xmlRegFreeRange: + * @range: the regexp range + * + * Free a regexp range + */ +static void +xmlRegFreeRange(xmlRegRangePtr range) { + if (range == NULL) + return; + + if (range->blockName != NULL) + xmlFree(range->blockName); + xmlFree(range); +} + +/** + * xmlRegNewAtom: + * @ctxt: the regexp parser context + * @type: the type of atom + * + * Allocate a new regexp range + * + * Returns the new atom or NULL in case of error + */ +static xmlRegAtomPtr +xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) { + xmlRegAtomPtr ret; + + ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom)); + if (ret == NULL) { + ERROR("failed to allocate regexp atom"); + return(NULL); + } + memset(ret, 0, sizeof(xmlRegAtom)); + ret->type = type; + ret->quant = XML_REGEXP_QUANT_ONCE; + ret->min = 0; + ret->max = 0; + return(ret); +} + +/** + * xmlRegFreeAtom: + * @atom: the regexp atom + * + * Free a regexp atom + */ +static void +xmlRegFreeAtom(xmlRegAtomPtr atom) { + int i; + + if (atom == NULL) + return; + + for (i = 0;i < atom->nbRanges;i++) + xmlRegFreeRange(atom->ranges[i]); + if (atom->ranges != NULL) + xmlFree(atom->ranges); + if (atom->type == XML_REGEXP_STRING) + xmlFree(atom->valuep); + xmlFree(atom); +} + +static xmlRegStatePtr +xmlRegNewState(xmlRegParserCtxtPtr ctxt) { + xmlRegStatePtr ret; + + ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState)); + if (ret == NULL) { + ERROR("failed to allocate regexp state"); + return(NULL); + } + memset(ret, 0, sizeof(xmlRegState)); + ret->type = XML_REGEXP_TRANS_STATE; + ret->mark = XML_REGEXP_MARK_NORMAL; + return(ret); +} + +/** + * xmlRegFreeState: + * @state: the regexp state + * + * Free a regexp state + */ +static void +xmlRegFreeState(xmlRegStatePtr state) { + if (state == NULL) + return; + + if (state->trans != NULL) + xmlFree(state->trans); + xmlFree(state); +} + +/** + * xmlRegFreeParserCtxt: + * @ctxt: the regexp parser context + * + * Free a regexp parser context + */ +static void +xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) { + int i; + if (ctxt == NULL) + return; + + if (ctxt->string != NULL) + xmlFree(ctxt->string); + if (ctxt->states != NULL) { + for (i = 0;i < ctxt->nbStates;i++) + xmlRegFreeState(ctxt->states[i]); + xmlFree(ctxt->states); + } + if (ctxt->atoms != NULL) { + for (i = 0;i < ctxt->nbAtoms;i++) + xmlRegFreeAtom(ctxt->atoms[i]); + xmlFree(ctxt->atoms); + } + if (ctxt->counters != NULL) + xmlFree(ctxt->counters); + xmlFree(ctxt); +} + +/************************************************************************ + * * + * Display of Data structures * + * * + ************************************************************************/ + +static void +xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { + switch (type) { + case XML_REGEXP_EPSILON: + fprintf(output, "epsilon "); break; + case XML_REGEXP_CHARVAL: + fprintf(output, "charval "); break; + case XML_REGEXP_RANGES: + fprintf(output, "ranges "); break; + case XML_REGEXP_SUBREG: + fprintf(output, "subexpr "); break; + case XML_REGEXP_STRING: + fprintf(output, "string "); break; + case XML_REGEXP_ANYCHAR: + fprintf(output, "anychar "); break; + case XML_REGEXP_ANYSPACE: + fprintf(output, "anyspace "); break; + case XML_REGEXP_NOTSPACE: + fprintf(output, "notspace "); break; + case XML_REGEXP_INITNAME: + fprintf(output, "initname "); break; + case XML_REGEXP_NOTINITNAME: + fprintf(output, "notinitname "); break; + case XML_REGEXP_NAMECHAR: + fprintf(output, "namechar "); break; + case XML_REGEXP_NOTNAMECHAR: + fprintf(output, "notnamechar "); break; + case XML_REGEXP_DECIMAL: + fprintf(output, "decimal "); break; + case XML_REGEXP_NOTDECIMAL: + fprintf(output, "notdecimal "); break; + case XML_REGEXP_REALCHAR: + fprintf(output, "realchar "); break; + case XML_REGEXP_NOTREALCHAR: + fprintf(output, "notrealchar "); break; + case XML_REGEXP_LETTER: + fprintf(output, "LETTER "); break; + case XML_REGEXP_LETTER_UPPERCASE: + fprintf(output, "LETTER_UPPERCASE "); break; + case XML_REGEXP_LETTER_LOWERCASE: + fprintf(output, "LETTER_LOWERCASE "); break; + case XML_REGEXP_LETTER_TITLECASE: + fprintf(output, "LETTER_TITLECASE "); break; + case XML_REGEXP_LETTER_MODIFIER: + fprintf(output, "LETTER_MODIFIER "); break; + case XML_REGEXP_LETTER_OTHERS: + fprintf(output, "LETTER_OTHERS "); break; + case XML_REGEXP_MARK: + fprintf(output, "MARK "); break; + case XML_REGEXP_MARK_NONSPACING: + fprintf(output, "MARK_NONSPACING "); break; + case XML_REGEXP_MARK_SPACECOMBINING: + fprintf(output, "MARK_SPACECOMBINING "); break; + case XML_REGEXP_MARK_ENCLOSING: + fprintf(output, "MARK_ENCLOSING "); break; + case XML_REGEXP_NUMBER: + fprintf(output, "NUMBER "); break; + case XML_REGEXP_NUMBER_DECIMAL: + fprintf(output, "NUMBER_DECIMAL "); break; + case XML_REGEXP_NUMBER_LETTER: + fprintf(output, "NUMBER_LETTER "); break; + case XML_REGEXP_NUMBER_OTHERS: + fprintf(output, "NUMBER_OTHERS "); break; + case XML_REGEXP_PUNCT: + fprintf(output, "PUNCT "); break; + case XML_REGEXP_PUNCT_CONNECTOR: + fprintf(output, "PUNCT_CONNECTOR "); break; + case XML_REGEXP_PUNCT_DASH: + fprintf(output, "PUNCT_DASH "); break; + case XML_REGEXP_PUNCT_OPEN: + fprintf(output, "PUNCT_OPEN "); break; + case XML_REGEXP_PUNCT_CLOSE: + fprintf(output, "PUNCT_CLOSE "); break; + case XML_REGEXP_PUNCT_INITQUOTE: + fprintf(output, "PUNCT_INITQUOTE "); break; + case XML_REGEXP_PUNCT_FINQUOTE: + fprintf(output, "PUNCT_FINQUOTE "); break; + case XML_REGEXP_PUNCT_OTHERS: + fprintf(output, "PUNCT_OTHERS "); break; + case XML_REGEXP_SEPAR: + fprintf(output, "SEPAR "); break; + case XML_REGEXP_SEPAR_SPACE: + fprintf(output, "SEPAR_SPACE "); break; + case XML_REGEXP_SEPAR_LINE: + fprintf(output, "SEPAR_LINE "); break; + case XML_REGEXP_SEPAR_PARA: + fprintf(output, "SEPAR_PARA "); break; + case XML_REGEXP_SYMBOL: + fprintf(output, "SYMBOL "); break; + case XML_REGEXP_SYMBOL_MATH: + fprintf(output, "SYMBOL_MATH "); break; + case XML_REGEXP_SYMBOL_CURRENCY: + fprintf(output, "SYMBOL_CURRENCY "); break; + case XML_REGEXP_SYMBOL_MODIFIER: + fprintf(output, "SYMBOL_MODIFIER "); break; + case XML_REGEXP_SYMBOL_OTHERS: + fprintf(output, "SYMBOL_OTHERS "); break; + case XML_REGEXP_OTHER: + fprintf(output, "OTHER "); break; + case XML_REGEXP_OTHER_CONTROL: + fprintf(output, "OTHER_CONTROL "); break; + case XML_REGEXP_OTHER_FORMAT: + fprintf(output, "OTHER_FORMAT "); break; + case XML_REGEXP_OTHER_PRIVATE: + fprintf(output, "OTHER_PRIVATE "); break; + case XML_REGEXP_OTHER_NA: + fprintf(output, "OTHER_NA "); break; + case XML_REGEXP_BLOCK_NAME: + fprintf(output, "BLOCK "); break; + } +} + +static void +xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) { + switch (type) { + case XML_REGEXP_QUANT_EPSILON: + fprintf(output, "epsilon "); break; + case XML_REGEXP_QUANT_ONCE: + fprintf(output, "once "); break; + case XML_REGEXP_QUANT_OPT: + fprintf(output, "? "); break; + case XML_REGEXP_QUANT_MULT: + fprintf(output, "* "); break; + case XML_REGEXP_QUANT_PLUS: + fprintf(output, "+ "); break; + case XML_REGEXP_QUANT_RANGE: + fprintf(output, "range "); break; + case XML_REGEXP_QUANT_ONCEONLY: + fprintf(output, "onceonly "); break; + case XML_REGEXP_QUANT_ALL: + fprintf(output, "all "); break; + } +} +static void +xmlRegPrintRange(FILE *output, xmlRegRangePtr range) { + fprintf(output, " range: "); + if (range->neg) + fprintf(output, "negative "); + xmlRegPrintAtomType(output, range->type); + fprintf(output, "%c - %c\n", range->start, range->end); +} + +static void +xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) { + fprintf(output, " atom: "); + if (atom == NULL) { + fprintf(output, "NULL\n"); + return; + } + xmlRegPrintAtomType(output, atom->type); + xmlRegPrintQuantType(output, atom->quant); + if (atom->quant == XML_REGEXP_QUANT_RANGE) + fprintf(output, "%d-%d ", atom->min, atom->max); + if (atom->type == XML_REGEXP_STRING) + fprintf(output, "'%s' ", (char *) atom->valuep); + if (atom->type == XML_REGEXP_CHARVAL) + fprintf(output, "char %c\n", atom->codepoint); + else if (atom->type == XML_REGEXP_RANGES) { + int i; + fprintf(output, "%d entries\n", atom->nbRanges); + for (i = 0; i < atom->nbRanges;i++) + xmlRegPrintRange(output, atom->ranges[i]); + } else if (atom->type == XML_REGEXP_SUBREG) { + fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no); + } else { + fprintf(output, "\n"); + } +} + +static void +xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) { + fprintf(output, " trans: "); + if (trans == NULL) { + fprintf(output, "NULL\n"); + return; + } + if (trans->to < 0) { + fprintf(output, "removed\n"); + return; + } + if (trans->counter >= 0) { + fprintf(output, "counted %d, ", trans->counter); + } + if (trans->count == REGEXP_ALL_COUNTER) { + fprintf(output, "all transition, "); + } else if (trans->count >= 0) { + fprintf(output, "count based %d, ", trans->count); + } + if (trans->atom == NULL) { + fprintf(output, "epsilon to %d\n", trans->to); + return; + } + if (trans->atom->type == XML_REGEXP_CHARVAL) + fprintf(output, "char %c ", trans->atom->codepoint); + fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); +} + +static void +xmlRegPrintState(FILE *output, xmlRegStatePtr state) { + int i; + + fprintf(output, " state: "); + if (state == NULL) { + fprintf(output, "NULL\n"); + return; + } + if (state->type == XML_REGEXP_START_STATE) + fprintf(output, "START "); + if (state->type == XML_REGEXP_FINAL_STATE) + fprintf(output, "FINAL "); + + fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); + for (i = 0;i < state->nbTrans; i++) { + xmlRegPrintTrans(output, &(state->trans[i])); + } +} + +#ifdef DEBUG_REGEXP_GRAPH +static void +xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { + int i; + + fprintf(output, " ctxt: "); + if (ctxt == NULL) { + fprintf(output, "NULL\n"); + return; + } + fprintf(output, "'%s' ", ctxt->string); + if (ctxt->error) + fprintf(output, "error "); + if (ctxt->neg) + fprintf(output, "neg "); + fprintf(output, "\n"); + fprintf(output, "%d atoms:\n", ctxt->nbAtoms); + for (i = 0;i < ctxt->nbAtoms; i++) { + fprintf(output, " %02d ", i); + xmlRegPrintAtom(output, ctxt->atoms[i]); + } + if (ctxt->atom != NULL) { + fprintf(output, "current atom:\n"); + xmlRegPrintAtom(output, ctxt->atom); + } + fprintf(output, "%d states:", ctxt->nbStates); + if (ctxt->start != NULL) + fprintf(output, " start: %d", ctxt->start->no); + if (ctxt->end != NULL) + fprintf(output, " end: %d", ctxt->end->no); + fprintf(output, "\n"); + for (i = 0;i < ctxt->nbStates; i++) { + xmlRegPrintState(output, ctxt->states[i]); + } + fprintf(output, "%d counters:\n", ctxt->nbCounters); + for (i = 0;i < ctxt->nbCounters; i++) { + fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, + ctxt->counters[i].max); + } +} +#endif + +/************************************************************************ + * * + * Finite Automata structures manipulations * + * * + ************************************************************************/ + +static void +xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, + int neg, xmlRegAtomType type, int start, int end, + xmlChar *blockName) { + xmlRegRangePtr range; + + if (atom == NULL) { + ERROR("add range: atom is NULL"); + return; + } + if (atom->type != XML_REGEXP_RANGES) { + ERROR("add range: atom is not ranges"); + return; + } + if (atom->maxRanges == 0) { + atom->maxRanges = 4; + atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges * + sizeof(xmlRegRangePtr)); + if (atom->ranges == NULL) { + ERROR("add range: allocation failed"); + atom->maxRanges = 0; + return; + } + } else if (atom->nbRanges >= atom->maxRanges) { + xmlRegRangePtr *tmp; + atom->maxRanges *= 2; + tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges * + sizeof(xmlRegRangePtr)); + if (tmp == NULL) { + ERROR("add range: allocation failed"); + atom->maxRanges /= 2; + return; + } + atom->ranges = tmp; + } + range = xmlRegNewRange(ctxt, neg, type, start, end); + if (range == NULL) + return; + range->blockName = blockName; + atom->ranges[atom->nbRanges++] = range; + +} + +static int +xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { + if (ctxt->maxCounters == 0) { + ctxt->maxCounters = 4; + ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * + sizeof(xmlRegCounter)); + if (ctxt->counters == NULL) { + ERROR("reg counter: allocation failed"); + ctxt->maxCounters = 0; + return(-1); + } + } else if (ctxt->nbCounters >= ctxt->maxCounters) { + xmlRegCounter *tmp; + ctxt->maxCounters *= 2; + tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters * + sizeof(xmlRegCounter)); + if (tmp == NULL) { + ERROR("reg counter: allocation failed"); + ctxt->maxCounters /= 2; + return(-1); + } + ctxt->counters = tmp; + } + ctxt->counters[ctxt->nbCounters].min = -1; + ctxt->counters[ctxt->nbCounters].max = -1; + return(ctxt->nbCounters++); +} + +static void +xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { + if (atom == NULL) { + ERROR("atom push: atom is NULL"); + return; + } + if (ctxt->maxAtoms == 0) { + ctxt->maxAtoms = 4; + ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * + sizeof(xmlRegAtomPtr)); + if (ctxt->atoms == NULL) { + ERROR("atom push: allocation failed"); + ctxt->maxAtoms = 0; + return; + } + } else if (ctxt->nbAtoms >= ctxt->maxAtoms) { + xmlRegAtomPtr *tmp; + ctxt->maxAtoms *= 2; + tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms * + sizeof(xmlRegAtomPtr)); + if (tmp == NULL) { + ERROR("atom push: allocation failed"); + ctxt->maxAtoms /= 2; + return; + } + ctxt->atoms = tmp; + } + atom->no = ctxt->nbAtoms; + ctxt->atoms[ctxt->nbAtoms++] = atom; +} + +static void +xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, + xmlRegAtomPtr atom, xmlRegStatePtr target, + int counter, int count) { + if (state == NULL) { + ERROR("add state: state is NULL"); + return; + } + if (target == NULL) { + ERROR("add state: target is NULL"); + return; + } + if (state->maxTrans == 0) { + state->maxTrans = 4; + state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans * + sizeof(xmlRegTrans)); + if (state->trans == NULL) { + ERROR("add range: allocation failed"); + state->maxTrans = 0; + return; + } + } else if (state->nbTrans >= state->maxTrans) { + xmlRegTrans *tmp; + state->maxTrans *= 2; + tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans * + sizeof(xmlRegTrans)); + if (tmp == NULL) { + ERROR("add range: allocation failed"); + state->maxTrans /= 2; + return; + } + state->trans = tmp; + } +#ifdef DEBUG_REGEXP_GRAPH + printf("Add trans from %d to %d ", state->no, target->no); + if (count == REGEXP_ALL_COUNTER) + printf("all transition"); + else if (count >= 0) + printf("count based %d", count); + else if (counter >= 0) + printf("counted %d", counter); + else if (atom == NULL) + printf("epsilon transition"); + printf("\n"); +#endif + + state->trans[state->nbTrans].atom = atom; + state->trans[state->nbTrans].to = target->no; + state->trans[state->nbTrans].counter = counter; + state->trans[state->nbTrans].count = count; + state->nbTrans++; +} + +static void +xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { + if (ctxt->maxStates == 0) { + ctxt->maxStates = 4; + ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates * + sizeof(xmlRegStatePtr)); + if (ctxt->states == NULL) { + ERROR("add range: allocation failed"); + ctxt->maxStates = 0; + return; + } + } else if (ctxt->nbStates >= ctxt->maxStates) { + xmlRegStatePtr *tmp; + ctxt->maxStates *= 2; + tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates * + sizeof(xmlRegStatePtr)); + if (tmp == NULL) { + ERROR("add range: allocation failed"); + ctxt->maxStates /= 2; + return; + } + ctxt->states = tmp; + } + state->no = ctxt->nbStates; + ctxt->states[ctxt->nbStates++] = state; +} + +/** + * xmlFAGenerateAllTransition: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * @lax: + * + */ +static void +xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to, + int lax) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + if (lax) + xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER); + else + xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER); +} + +/** + * xmlFAGenerateEpsilonTransition: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * + */ +static void +xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1); +} + +/** + * xmlFAGenerateCountedEpsilonTransition: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * counter: the counter for that transition + * + */ +static void +xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to, int counter) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1); +} + +/** + * xmlFAGenerateCountedTransition: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * counter: the counter for that transition + * + */ +static void +xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to, int counter) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter); +} + +/** + * xmlFAGenerateTransitions: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * @atom: the atom generating the transition + * + */ +static void +xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, + xmlRegStatePtr to, xmlRegAtomPtr atom) { + if (atom == NULL) { + ERROR("genrate transition: atom == NULL"); + return; + } + if (atom->type == XML_REGEXP_SUBREG) { + /* + * this is a subexpression handling one should not need to + * create a new node excep for XML_REGEXP_QUANT_RANGE. + */ + xmlRegAtomPush(ctxt, atom); + if ((to != NULL) && (atom->stop != to) && + (atom->quant != XML_REGEXP_QUANT_RANGE)) { + /* + * Generate an epsilon transition to link to the target + */ + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); + } + switch (atom->quant) { + case XML_REGEXP_QUANT_OPT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); + break; + case XML_REGEXP_QUANT_MULT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); + break; + case XML_REGEXP_QUANT_PLUS: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); + break; + case XML_REGEXP_QUANT_RANGE: { + int counter; + xmlRegStatePtr newstate; + + /* + * This one is nasty: + * 1/ register a new counter + * 2/ register an epsilon transition associated to + * this counter going from atom->stop to atom->start + * 3/ create a new state + * 4/ generate a counted transition from atom->stop to + * that state + */ + counter = xmlRegGetCounter(ctxt); + ctxt->counters[counter].min = atom->min - 1; + ctxt->counters[counter].max = atom->max - 1; + atom->min = 0; + atom->max = 0; + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, + atom->start, counter); + if (to != NULL) { + newstate = to; + } else { + newstate = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, newstate); + ctxt->state = newstate; + } + xmlFAGenerateCountedTransition(ctxt, atom->stop, + newstate, counter); + } + default: + break; + } + return; + } else { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + } + xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); + xmlRegAtomPush(ctxt, atom); + ctxt->state = to; + } + switch (atom->quant) { + case XML_REGEXP_QUANT_OPT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, from, to); + break; + case XML_REGEXP_QUANT_MULT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, from, to); + xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); + break; + case XML_REGEXP_QUANT_PLUS: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); + break; + default: + break; + } +} + +/** + * xmlFAReduceEpsilonTransitions: + * @ctxt: a regexp parser context + * @fromnr: the from state + * @tonr: the to state + * @cpunter: should that transition be associted to a counted + * + */ +static void +xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, + int tonr, int counter) { + int transnr; + xmlRegStatePtr from; + xmlRegStatePtr to; + +#ifdef DEBUG_REGEXP_GRAPH + printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr); +#endif + from = ctxt->states[fromnr]; + if (from == NULL) + return; + to = ctxt->states[tonr]; + if (to == NULL) + return; + if ((to->mark == XML_REGEXP_MARK_START) || + (to->mark == XML_REGEXP_MARK_VISITED)) + return; + + to->mark = XML_REGEXP_MARK_VISITED; + if (to->type == XML_REGEXP_FINAL_STATE) { +#ifdef DEBUG_REGEXP_GRAPH + printf("State %d is final, so %d becomes final\n", tonr, fromnr); +#endif + from->type = XML_REGEXP_FINAL_STATE; + } + for (transnr = 0;transnr < to->nbTrans;transnr++) { + if (to->trans[transnr].atom == NULL) { + /* + * Don't remove counted transitions + * Don't loop either + */ + if (to->trans[transnr].to != fromnr) { + if (to->trans[transnr].count >= 0) { + int newto = to->trans[transnr].to; + + xmlRegStateAddTrans(ctxt, from, NULL, + ctxt->states[newto], + -1, to->trans[transnr].count); + } else { +#ifdef DEBUG_REGEXP_GRAPH + printf("Found epsilon trans %d from %d to %d\n", + transnr, tonr, to->trans[transnr].to); +#endif + if (to->trans[transnr].counter >= 0) { + xmlFAReduceEpsilonTransitions(ctxt, fromnr, + to->trans[transnr].to, + to->trans[transnr].counter); + } else { + xmlFAReduceEpsilonTransitions(ctxt, fromnr, + to->trans[transnr].to, + counter); + } + } + } + } else { + int newto = to->trans[transnr].to; + + if (to->trans[transnr].counter >= 0) { + xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, + ctxt->states[newto], + to->trans[transnr].counter, -1); + } else { + xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, + ctxt->states[newto], counter, -1); + } + } + } + to->mark = XML_REGEXP_MARK_NORMAL; +} + +/** + * xmlFAEliminateEpsilonTransitions: + * @ctxt: a regexp parser context + * + */ +static void +xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { + int statenr, transnr; + xmlRegStatePtr state; + + /* + * build the completed transitions bypassing the epsilons + * Use a marking algorithm to avoid loops + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + for (transnr = 0;transnr < state->nbTrans;transnr++) { + if ((state->trans[transnr].atom == NULL) && + (state->trans[transnr].to >= 0)) { + if (state->trans[transnr].to == statenr) { + state->trans[transnr].to = -1; +#ifdef DEBUG_REGEXP_GRAPH + printf("Removed loopback epsilon trans %d on %d\n", + transnr, statenr); +#endif + } else if (state->trans[transnr].count < 0) { + int newto = state->trans[transnr].to; + +#ifdef DEBUG_REGEXP_GRAPH + printf("Found epsilon trans %d from %d to %d\n", + transnr, statenr, newto); +#endif + state->mark = XML_REGEXP_MARK_START; + xmlFAReduceEpsilonTransitions(ctxt, statenr, + newto, state->trans[transnr].counter); + state->mark = XML_REGEXP_MARK_NORMAL; +#ifdef DEBUG_REGEXP_GRAPH + } else { + printf("Found counted transition %d on %d\n", + transnr, statenr); +#endif + } + } + } + } + /* + * Eliminate the epsilon transitions + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + for (transnr = 0;transnr < state->nbTrans;transnr++) { + if ((state->trans[transnr].atom == NULL) && + (state->trans[transnr].count < 0) && + (state->trans[transnr].to >= 0)) { + state->trans[transnr].to = -1; + } + } + } + + /* + * Use this pass to detect unreachable states too + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state != NULL) + state->reached = 0; + } + state = ctxt->states[0]; + if (state != NULL) + state->reached = 1; + while (state != NULL) { + xmlRegStatePtr target = NULL; + state->reached = 2; + /* + * Mark all state reachable from the current reachable state + */ + for (transnr = 0;transnr < state->nbTrans;transnr++) { + if ((state->trans[transnr].to >= 0) && + ((state->trans[transnr].atom != NULL) || + (state->trans[transnr].count >= 0))) { + int newto = state->trans[transnr].to; + + if (ctxt->states[newto] == NULL) + continue; + if (ctxt->states[newto]->reached == 0) { + ctxt->states[newto]->reached = 1; + target = ctxt->states[newto]; + } + } + } + /* + * find the next accessible state not explored + */ + if (target == NULL) { + for (statenr = 1;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if ((state != NULL) && (state->reached == 1)) { + target = state; + break; + } + } + } + state = target; + } + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if ((state != NULL) && (state->reached == 0)) { +#ifdef DEBUG_REGEXP_GRAPH + printf("Removed unreachable state %d\n", statenr); +#endif + xmlRegFreeState(state); + ctxt->states[statenr] = NULL; + } + } + +} + +/** + * xmlFACompareAtoms: + * @atom1: an atom + * @atom2: an atom + * + * Compares two atoms to check whether they are equivatents + * + * Returns 1 if yes and 0 otherwise + */ +static int +xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) { + if (atom1 == atom2) + return(1); + if ((atom1 == NULL) || (atom2 == NULL)) + return(0); + + if (atom1->type != atom2->type) + return(0); + switch (atom1->type) { + case XML_REGEXP_STRING: + return(xmlStrEqual((xmlChar *)atom1->valuep, + (xmlChar *)atom2->valuep)); + case XML_REGEXP_EPSILON: + return(1); + case XML_REGEXP_CHARVAL: + return(atom1->codepoint == atom2->codepoint); + case XML_REGEXP_RANGES: + TODO; + return(0); + default: + break; + } + return(1); +} + +/** + * xmlFARecurseDeterminism: + * @ctxt: a regexp parser context + * + * Check whether the associated regexp is determinist, + * should be called after xmlFAEliminateEpsilonTransitions() + * + */ +static int +xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, + int to, xmlRegAtomPtr atom) { + int ret = 1; + int transnr; + xmlRegTransPtr t1; + + if (state == NULL) + return(ret); + for (transnr = 0;transnr < state->nbTrans;transnr++) { + t1 = &(state->trans[transnr]); + /* + * check transitions conflicting with the one looked at + */ + if (t1->atom == NULL) { + if (t1->to == -1) + continue; + ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], + to, atom); + if (ret == 0) + return(0); + continue; + } + if (t1->to != to) + continue; + if (xmlFACompareAtoms(t1->atom, atom)) + return(0); + } + return(ret); +} + +/** + * xmlFAComputesDeterminism: + * @ctxt: a regexp parser context + * + * Check whether the associated regexp is determinist, + * should be called after xmlFAEliminateEpsilonTransitions() + * + */ +static int +xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { + int statenr, transnr; + xmlRegStatePtr state; + xmlRegTransPtr t1, t2; + int i; + int ret = 1; + +#ifdef DEBUG_REGEXP_GRAPH + printf("xmlFAComputesDeterminism\n"); + xmlRegPrintCtxt(stdout, ctxt); +#endif + if (ctxt->determinist != -1) + return(ctxt->determinist); + + /* + * Check for all states that there isn't 2 transitions + * with the same atom and a different target. + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + for (transnr = 0;transnr < state->nbTrans;transnr++) { + t1 = &(state->trans[transnr]); + /* + * Determinism checks in case of counted or all transitions + * will have to be handled separately + */ + if (t1->atom == NULL) + continue; + if (t1->to == -1) /* eliminated */ + continue; + for (i = 0;i < transnr;i++) { + t2 = &(state->trans[i]); + if (t2->to == -1) /* eliminated */ + continue; + if (t2->atom != NULL) { + if (t1->to == t2->to) { + if (xmlFACompareAtoms(t1->atom, t2->atom)) + t2->to = -1; /* eliminate */ + } else { + /* not determinist ! */ + if (xmlFACompareAtoms(t1->atom, t2->atom)) + ret = 0; + } + } else if (t1->to != -1) { + /* + * do the closure in case of remaining specific + * epsilon transitions like choices or all + */ + ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], + t2->to, t2->atom); + if (ret == 0) + return(0); + } + } + if (ret == 0) + break; + } + if (ret == 0) + break; + } + ctxt->determinist = ret; + return(ret); +} + +/************************************************************************ + * * + * Routines to check input against transition atoms * + * * + ************************************************************************/ + +static int +xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, + int start, int end, const xmlChar *blockName) { + int ret = 0; + + switch (type) { + case XML_REGEXP_STRING: + case XML_REGEXP_SUBREG: + case XML_REGEXP_RANGES: + case XML_REGEXP_EPSILON: + return(-1); + case XML_REGEXP_ANYCHAR: + ret = ((codepoint != '\n') && (codepoint != '\r')); + break; + case XML_REGEXP_CHARVAL: + ret = ((codepoint >= start) && (codepoint <= end)); + break; + case XML_REGEXP_NOTSPACE: + neg = !neg; + case XML_REGEXP_ANYSPACE: + ret = ((codepoint == '\n') || (codepoint == '\r') || + (codepoint == '\t') || (codepoint == ' ')); + break; + case XML_REGEXP_NOTINITNAME: + neg = !neg; + case XML_REGEXP_INITNAME: + ret = (xmlIsLetter(codepoint) || + (codepoint == '_') || (codepoint == ':')); + break; + case XML_REGEXP_NOTNAMECHAR: + neg = !neg; + case XML_REGEXP_NAMECHAR: + ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) || + (codepoint == '.') || (codepoint == '-') || + (codepoint == '_') || (codepoint == ':') || + xmlIsCombining(codepoint) || xmlIsExtender(codepoint)); + break; + case XML_REGEXP_NOTDECIMAL: + neg = !neg; + case XML_REGEXP_DECIMAL: + ret = xmlUCSIsCatNd(codepoint); + break; + case XML_REGEXP_REALCHAR: + neg = !neg; + case XML_REGEXP_NOTREALCHAR: + ret = xmlUCSIsCatP(codepoint); + if (ret == 0) + ret = xmlUCSIsCatZ(codepoint); + if (ret == 0) + ret = xmlUCSIsCatC(codepoint); + break; + case XML_REGEXP_LETTER: + ret = xmlUCSIsCatL(codepoint); + break; + case XML_REGEXP_LETTER_UPPERCASE: + ret = xmlUCSIsCatLu(codepoint); + break; + case XML_REGEXP_LETTER_LOWERCASE: + ret = xmlUCSIsCatLl(codepoint); + break; + case XML_REGEXP_LETTER_TITLECASE: + ret = xmlUCSIsCatLt(codepoint); + break; + case XML_REGEXP_LETTER_MODIFIER: + ret = xmlUCSIsCatLm(codepoint); + break; + case XML_REGEXP_LETTER_OTHERS: + ret = xmlUCSIsCatLo(codepoint); + break; + case XML_REGEXP_MARK: + ret = xmlUCSIsCatM(codepoint); + break; + case XML_REGEXP_MARK_NONSPACING: + ret = xmlUCSIsCatMn(codepoint); + break; + case XML_REGEXP_MARK_SPACECOMBINING: + ret = xmlUCSIsCatMc(codepoint); + break; + case XML_REGEXP_MARK_ENCLOSING: + ret = xmlUCSIsCatMe(codepoint); + break; + case XML_REGEXP_NUMBER: + ret = xmlUCSIsCatN(codepoint); + break; + case XML_REGEXP_NUMBER_DECIMAL: + ret = xmlUCSIsCatNd(codepoint); + break; + case XML_REGEXP_NUMBER_LETTER: + ret = xmlUCSIsCatNl(codepoint); + break; + case XML_REGEXP_NUMBER_OTHERS: + ret = xmlUCSIsCatNo(codepoint); + break; + case XML_REGEXP_PUNCT: + ret = xmlUCSIsCatP(codepoint); + break; + case XML_REGEXP_PUNCT_CONNECTOR: + ret = xmlUCSIsCatPc(codepoint); + break; + case XML_REGEXP_PUNCT_DASH: + ret = xmlUCSIsCatPd(codepoint); + break; + case XML_REGEXP_PUNCT_OPEN: + ret = xmlUCSIsCatPs(codepoint); + break; + case XML_REGEXP_PUNCT_CLOSE: + ret = xmlUCSIsCatPe(codepoint); + break; + case XML_REGEXP_PUNCT_INITQUOTE: + ret = xmlUCSIsCatPi(codepoint); + break; + case XML_REGEXP_PUNCT_FINQUOTE: + ret = xmlUCSIsCatPf(codepoint); + break; + case XML_REGEXP_PUNCT_OTHERS: + ret = xmlUCSIsCatPo(codepoint); + break; + case XML_REGEXP_SEPAR: + ret = xmlUCSIsCatZ(codepoint); + break; + case XML_REGEXP_SEPAR_SPACE: + ret = xmlUCSIsCatZs(codepoint); + break; + case XML_REGEXP_SEPAR_LINE: + ret = xmlUCSIsCatZl(codepoint); + break; + case XML_REGEXP_SEPAR_PARA: + ret = xmlUCSIsCatZp(codepoint); + break; + case XML_REGEXP_SYMBOL: + ret = xmlUCSIsCatS(codepoint); + break; + case XML_REGEXP_SYMBOL_MATH: + ret = xmlUCSIsCatSm(codepoint); + break; + case XML_REGEXP_SYMBOL_CURRENCY: + ret = xmlUCSIsCatSc(codepoint); + break; + case XML_REGEXP_SYMBOL_MODIFIER: + ret = xmlUCSIsCatSk(codepoint); + break; + case XML_REGEXP_SYMBOL_OTHERS: + ret = xmlUCSIsCatSo(codepoint); + break; + case XML_REGEXP_OTHER: + ret = xmlUCSIsCatC(codepoint); + break; + case XML_REGEXP_OTHER_CONTROL: + ret = xmlUCSIsCatCc(codepoint); + break; + case XML_REGEXP_OTHER_FORMAT: + ret = xmlUCSIsCatCf(codepoint); + break; + case XML_REGEXP_OTHER_PRIVATE: + ret = xmlUCSIsCatCo(codepoint); + break; + case XML_REGEXP_OTHER_NA: + /* ret = xmlUCSIsCatCn(codepoint); */ + /* Seems it doesn't exist anymore in recent Unicode releases */ + ret = 0; + break; + case XML_REGEXP_BLOCK_NAME: + ret = xmlUCSIsBlock(codepoint, (const char *) blockName); + break; + } + if (neg) + return(!ret); + return(ret); +} + +static int +xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { + int i, ret = 0; + xmlRegRangePtr range; + + if ((atom == NULL) || (!xmlIsChar(codepoint))) + return(-1); + + switch (atom->type) { + case XML_REGEXP_SUBREG: + case XML_REGEXP_EPSILON: + return(-1); + case XML_REGEXP_CHARVAL: + return(codepoint == atom->codepoint); + case XML_REGEXP_RANGES: { + int accept = 0; + for (i = 0;i < atom->nbRanges;i++) { + range = atom->ranges[i]; + if (range->neg) { + ret = xmlRegCheckCharacterRange(range->type, codepoint, + 0, range->start, range->end, + range->blockName); + if (ret != 0) + return(0); /* excluded char */ + } else { + ret = xmlRegCheckCharacterRange(range->type, codepoint, + 0, range->start, range->end, + range->blockName); + if (ret != 0) + accept = 1; /* might still be excluded */ + } + } + return(accept); + } + case XML_REGEXP_STRING: + printf("TODO: XML_REGEXP_STRING\n"); + return(-1); + case XML_REGEXP_ANYCHAR: + case XML_REGEXP_ANYSPACE: + case XML_REGEXP_NOTSPACE: + case XML_REGEXP_INITNAME: + case XML_REGEXP_NOTINITNAME: + case XML_REGEXP_NAMECHAR: + case XML_REGEXP_NOTNAMECHAR: + case XML_REGEXP_DECIMAL: + case XML_REGEXP_NOTDECIMAL: + case XML_REGEXP_REALCHAR: + case XML_REGEXP_NOTREALCHAR: + case XML_REGEXP_LETTER: + case XML_REGEXP_LETTER_UPPERCASE: + case XML_REGEXP_LETTER_LOWERCASE: + case XML_REGEXP_LETTER_TITLECASE: + case XML_REGEXP_LETTER_MODIFIER: + case XML_REGEXP_LETTER_OTHERS: + case XML_REGEXP_MARK: + case XML_REGEXP_MARK_NONSPACING: + case XML_REGEXP_MARK_SPACECOMBINING: + case XML_REGEXP_MARK_ENCLOSING: + case XML_REGEXP_NUMBER: + case XML_REGEXP_NUMBER_DECIMAL: + case XML_REGEXP_NUMBER_LETTER: + case XML_REGEXP_NUMBER_OTHERS: + case XML_REGEXP_PUNCT: + case XML_REGEXP_PUNCT_CONNECTOR: + case XML_REGEXP_PUNCT_DASH: + case XML_REGEXP_PUNCT_OPEN: + case XML_REGEXP_PUNCT_CLOSE: + case XML_REGEXP_PUNCT_INITQUOTE: + case XML_REGEXP_PUNCT_FINQUOTE: + case XML_REGEXP_PUNCT_OTHERS: + case XML_REGEXP_SEPAR: + case XML_REGEXP_SEPAR_SPACE: + case XML_REGEXP_SEPAR_LINE: + case XML_REGEXP_SEPAR_PARA: + case XML_REGEXP_SYMBOL: + case XML_REGEXP_SYMBOL_MATH: + case XML_REGEXP_SYMBOL_CURRENCY: + case XML_REGEXP_SYMBOL_MODIFIER: + case XML_REGEXP_SYMBOL_OTHERS: + case XML_REGEXP_OTHER: + case XML_REGEXP_OTHER_CONTROL: + case XML_REGEXP_OTHER_FORMAT: + case XML_REGEXP_OTHER_PRIVATE: + case XML_REGEXP_OTHER_NA: + case XML_REGEXP_BLOCK_NAME: + ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, + (const xmlChar *)atom->valuep); + if (atom->neg) + ret = !ret; + break; + } + return(ret); +} + +/************************************************************************ + * * + * Saving an restoring state of an execution context * + * * + ************************************************************************/ + +#ifdef DEBUG_REGEXP_EXEC +static void +xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { + printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); + if (exec->inputStack != NULL) { + int i; + printf(": "); + for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) + printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]); + } else { + printf(": %s", &(exec->inputString[exec->index])); + } + printf("\n"); +} +#endif + +static void +xmlFARegExecSave(xmlRegExecCtxtPtr exec) { +#ifdef DEBUG_REGEXP_EXEC + printf("saving "); + exec->transno++; + xmlFARegDebugExec(exec); + exec->transno--; +#endif + + if (exec->maxRollbacks == 0) { + exec->maxRollbacks = 4; + exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks * + sizeof(xmlRegExecRollback)); + if (exec->rollbacks == NULL) { + fprintf(stderr, "exec save: allocation failed"); + exec->maxRollbacks = 0; + return; + } + memset(exec->rollbacks, 0, + exec->maxRollbacks * sizeof(xmlRegExecRollback)); + } else if (exec->nbRollbacks >= exec->maxRollbacks) { + xmlRegExecRollback *tmp; + int len = exec->maxRollbacks; + + exec->maxRollbacks *= 2; + tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks, + exec->maxRollbacks * sizeof(xmlRegExecRollback)); + if (tmp == NULL) { + fprintf(stderr, "exec save: allocation failed"); + exec->maxRollbacks /= 2; + return; + } + exec->rollbacks = tmp; + tmp = &exec->rollbacks[len]; + memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback)); + } + exec->rollbacks[exec->nbRollbacks].state = exec->state; + exec->rollbacks[exec->nbRollbacks].index = exec->index; + exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1; + if (exec->comp->nbCounters > 0) { + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { + exec->rollbacks[exec->nbRollbacks].counts = (int *) + xmlMalloc(exec->comp->nbCounters * sizeof(int)); + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { + fprintf(stderr, "exec save: allocation failed"); + exec->status = -5; + return; + } + } + memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts, + exec->comp->nbCounters * sizeof(int)); + } + exec->nbRollbacks++; +} + +static void +xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { + if (exec->nbRollbacks <= 0) { + exec->status = -1; +#ifdef DEBUG_REGEXP_EXEC + printf("rollback failed on empty stack\n"); +#endif + return; + } + exec->nbRollbacks--; + exec->state = exec->rollbacks[exec->nbRollbacks].state; + exec->index = exec->rollbacks[exec->nbRollbacks].index; + exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; + if (exec->comp->nbCounters > 0) { + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { + fprintf(stderr, "exec save: allocation failed"); + exec->status = -6; + return; + } + memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, + exec->comp->nbCounters * sizeof(int)); + } + +#ifdef DEBUG_REGEXP_EXEC + printf("restored "); + xmlFARegDebugExec(exec); +#endif +} + +/************************************************************************ + * * + * Verifyer, running an input against a compiled regexp * + * * + ************************************************************************/ + +static int +xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { + xmlRegExecCtxt execval; + xmlRegExecCtxtPtr exec = &execval; + int ret, codepoint, len; + + exec->inputString = content; + exec->index = 0; + exec->determinist = 1; + exec->maxRollbacks = 0; + exec->nbRollbacks = 0; + exec->rollbacks = NULL; + exec->status = 0; + exec->comp = comp; + exec->state = comp->states[0]; + exec->transno = 0; + exec->transcount = 0; + if (comp->nbCounters > 0) { + exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); + if (exec->counts == NULL) + return(-1); + memset(exec->counts, 0, comp->nbCounters * sizeof(int)); + } else + exec->counts = NULL; + while ((exec->status == 0) && + ((exec->inputString[exec->index] != 0) || + (exec->state->type != XML_REGEXP_FINAL_STATE))) { + xmlRegTransPtr trans; + xmlRegAtomPtr atom; + + /* + * End of input on non-terminal state, rollback, however we may + * still have epsilon like transition for counted transitions + * on counters, in that case don't break too early. + */ + if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) + goto rollback; + + exec->transcount = 0; + for (;exec->transno < exec->state->nbTrans;exec->transno++) { + trans = &exec->state->trans[exec->transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + ret = 0; + if (trans->count >= 0) { + int count; + xmlRegCounterPtr counter; + + /* + * A counted transition. + */ + + count = exec->counts[trans->count]; + counter = &exec->comp->counters[trans->count]; +#ifdef DEBUG_REGEXP_EXEC + printf("testing count %d: val %d, min %d, max %d\n", + trans->count, count, counter->min, counter->max); +#endif + ret = ((count >= counter->min) && (count <= counter->max)); + } else if (atom == NULL) { + fprintf(stderr, "epsilon transition left at runtime\n"); + exec->status = -2; + break; + } else if (exec->inputString[exec->index] != 0) { + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); + ret = xmlRegCheckCharacter(atom, codepoint); + if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { + xmlRegStatePtr to = comp->states[trans->to]; + + /* + * this is a multiple input sequence + */ + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + exec->transcount = 1; + do { + /* + * Try to progress as much as possible on the input + */ + if (exec->transcount == atom->max) { + break; + } + exec->index += len; + /* + * End of input: stop here + */ + if (exec->inputString[exec->index] == 0) { + exec->index -= len; + break; + } + if (exec->transcount >= atom->min) { + int transno = exec->transno; + xmlRegStatePtr state = exec->state; + + /* + * The transition is acceptable save it + */ + exec->transno = -1; /* trick */ + exec->state = to; + xmlFARegExecSave(exec); + exec->transno = transno; + exec->state = state; + } + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), + len); + ret = xmlRegCheckCharacter(atom, codepoint); + exec->transcount++; + } while (ret == 1); + if (exec->transcount < atom->min) + ret = 0; + + /* + * If the last check failed but one transition was found + * possible, rollback + */ + if (ret < 0) + ret = 0; + if (ret == 0) { + goto rollback; + } + } + } + if (ret == 1) { + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + if (trans->counter >= 0) { +#ifdef DEBUG_REGEXP_EXEC + printf("Increasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]++; + } +#ifdef DEBUG_REGEXP_EXEC + printf("entering state %d\n", trans->to); +#endif + exec->state = comp->states[trans->to]; + exec->transno = 0; + if (trans->atom != NULL) { + exec->index += len; + } + goto progress; + } else if (ret < 0) { + exec->status = -4; + break; + } + } + if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { +rollback: + /* + * Failed to find a way out + */ + exec->determinist = 0; + xmlFARegExecRollBack(exec); + } +progress: + continue; + } + if (exec->rollbacks != NULL) { + if (exec->counts != NULL) { + int i; + + for (i = 0;i < exec->maxRollbacks;i++) + if (exec->rollbacks[i].counts != NULL) + xmlFree(exec->rollbacks[i].counts); + } + xmlFree(exec->rollbacks); + } + if (exec->counts != NULL) + xmlFree(exec->counts); + if (exec->status == 0) + return(1); + if (exec->status == -1) + return(0); + return(exec->status); +} + +/************************************************************************ + * * + * Progressive interface to the verifyer one atom at a time * + * * + ************************************************************************/ + +/** + * xmlRegNewExecCtxt: + * @comp: a precompiled regular expression + * @callback: a callback function used for handling progresses in the + * automata matching phase + * @data: the context data associated to the callback in this context + * + * Build a context used for progressive evaluation of a regexp. + * + * Returns the new context + */ +xmlRegExecCtxtPtr +xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) { + xmlRegExecCtxtPtr exec; + + if (comp == NULL) + return(NULL); + exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt)); + if (exec == NULL) { + return(NULL); + } + memset(exec, 0, sizeof(xmlRegExecCtxt)); + exec->inputString = NULL; + exec->index = 0; + exec->determinist = 1; + exec->maxRollbacks = 0; + exec->nbRollbacks = 0; + exec->rollbacks = NULL; + exec->status = 0; + exec->comp = comp; + if (comp->compact == NULL) + exec->state = comp->states[0]; + exec->transno = 0; + exec->transcount = 0; + exec->callback = callback; + exec->data = data; + if (comp->nbCounters > 0) { + exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); + if (exec->counts == NULL) { + xmlFree(exec); + return(NULL); + } + memset(exec->counts, 0, comp->nbCounters * sizeof(int)); + } else + exec->counts = NULL; + exec->inputStackMax = 0; + exec->inputStackNr = 0; + exec->inputStack = NULL; + return(exec); +} + +/** + * xmlRegFreeExecCtxt: + * @exec: a regular expression evaulation context + * + * Free the structures associated to a regular expression evaulation context. + */ +void +xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) { + if (exec == NULL) + return; + + if (exec->rollbacks != NULL) { + if (exec->counts != NULL) { + int i; + + for (i = 0;i < exec->maxRollbacks;i++) + if (exec->rollbacks[i].counts != NULL) + xmlFree(exec->rollbacks[i].counts); + } + xmlFree(exec->rollbacks); + } + if (exec->counts != NULL) + xmlFree(exec->counts); + if (exec->inputStack != NULL) { + int i; + + for (i = 0;i < exec->inputStackNr;i++) { + if (exec->inputStack[i].value != NULL) + xmlFree(exec->inputStack[i].value); + } + xmlFree(exec->inputStack); + } + xmlFree(exec); +} + +static void +xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, + void *data) { +#ifdef DEBUG_PUSH + printf("saving value: %d:%s\n", exec->inputStackNr, value); +#endif + if (exec->inputStackMax == 0) { + exec->inputStackMax = 4; + exec->inputStack = (xmlRegInputTokenPtr) + xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); + if (exec->inputStack == NULL) { + fprintf(stderr, "push input: allocation failed"); + exec->inputStackMax = 0; + return; + } + } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { + xmlRegInputTokenPtr tmp; + + exec->inputStackMax *= 2; + tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, + exec->inputStackMax * sizeof(xmlRegInputToken)); + if (tmp == NULL) { + fprintf(stderr, "push input: allocation failed"); + exec->inputStackMax /= 2; + return; + } + exec->inputStack = tmp; + } + exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); + exec->inputStack[exec->inputStackNr].data = data; + exec->inputStackNr++; + exec->inputStack[exec->inputStackNr].value = NULL; + exec->inputStack[exec->inputStackNr].data = NULL; +} + + +/** + * xmlRegCompactPushString: + * @exec: a regexp execution context + * @comp: the precompiled exec with a compact table + * @value: a string token input + * @data: data associated to the token to reuse in callbacks + * + * Push one input token in the execution context + * + * Returns: 1 if the regexp reached a final state, 0 if non-final, and + * a negative value in case of error. + */ +static int +xmlRegCompactPushString(xmlRegExecCtxtPtr exec, + xmlRegexpPtr comp, + const xmlChar *value, + void *data) { + int state = exec->index; + int i, target; + + if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL)) + return(-1); + + if (value == NULL) { + /* + * are we at a final state ? + */ + if (comp->compact[state * (comp->nbstrings + 1)] == + XML_REGEXP_FINAL_STATE) + return(1); + return(0); + } + +#ifdef DEBUG_PUSH + printf("value pushed: %s\n", value); +#endif + + /* + * Examine all outside transition from current state + */ + for (i = 0;i < comp->nbstrings;i++) { + target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; + if ((target > 0) && (target <= comp->nbstates)) { + target--; /* to avoid 0 */ + if (xmlStrEqual(comp->stringMap[i], value)) { + exec->index = target; + if ((exec->callback != NULL) && (comp->transdata != NULL)) { + exec->callback(exec->data, value, + comp->transdata[state * comp->nbstrings + i], data); + } +#ifdef DEBUG_PUSH + printf("entering state %d\n", target); +#endif + if (comp->compact[target * (comp->nbstrings + 1)] == + XML_REGEXP_FINAL_STATE) + return(1); + return(0); + } + } + } + /* + * Failed to find an exit transition out from current state for the + * current token + */ +#ifdef DEBUG_PUSH + printf("failed to find a transition for %s on state %d\n", value, state); +#endif + exec->status = -1; + return(-1); +} + +/** + * xmlRegExecPushString: + * @exec: a regexp execution context + * @value: a string token input + * @data: data associated to the token to reuse in callbacks + * + * Push one input token in the execution context + * + * Returns: 1 if the regexp reached a final state, 0 if non-final, and + * a negative value in case of error. + */ +int +xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, + void *data) { + xmlRegTransPtr trans; + xmlRegAtomPtr atom; + int ret; + int final = 0; + + if (exec == NULL) + return(-1); + if (exec->comp == NULL) + return(-1); + if (exec->status != 0) + return(exec->status); + + if (exec->comp->compact != NULL) + return(xmlRegCompactPushString(exec, exec->comp, value, data)); + + if (value == NULL) { + if (exec->state->type == XML_REGEXP_FINAL_STATE) + return(1); + final = 1; + } + +#ifdef DEBUG_PUSH + printf("value pushed: %s\n", value); +#endif + /* + * If we have an active rollback stack push the new value there + * and get back to where we were left + */ + if ((value != NULL) && (exec->inputStackNr > 0)) { + xmlFARegExecSaveInputString(exec, value, data); + value = exec->inputStack[exec->index].value; + data = exec->inputStack[exec->index].data; +#ifdef DEBUG_PUSH + printf("value loaded: %s\n", value); +#endif + } + + while ((exec->status == 0) && + ((value != NULL) || + ((final == 1) && + (exec->state->type != XML_REGEXP_FINAL_STATE)))) { + + /* + * End of input on non-terminal state, rollback, however we may + * still have epsilon like transition for counted transitions + * on counters, in that case don't break too early. + */ + if ((value == NULL) && (exec->counts == NULL)) + goto rollback; + + exec->transcount = 0; + for (;exec->transno < exec->state->nbTrans;exec->transno++) { + trans = &exec->state->trans[exec->transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + ret = 0; + if (trans->count == REGEXP_ALL_LAX_COUNTER) { + int i; + int count; + xmlRegTransPtr t; + xmlRegCounterPtr counter; + + ret = 0; + +#ifdef DEBUG_PUSH + printf("testing all lax %d\n", trans->count); +#endif + /* + * Check all counted transitions from the current state + */ + if ((value == NULL) && (final)) { + ret = 1; + } else if (value != NULL) { + for (i = 0;i < exec->state->nbTrans;i++) { + t = &exec->state->trans[i]; + if ((t->counter < 0) || (t == trans)) + continue; + counter = &exec->comp->counters[t->counter]; + count = exec->counts[t->counter]; + if ((count < counter->max) && + (t->atom != NULL) && + (xmlStrEqual(value, t->atom->valuep))) { + ret = 0; + break; + } + if ((count >= counter->min) && + (count < counter->max) && + (xmlStrEqual(value, t->atom->valuep))) { + ret = 1; + break; + } + } + } + } else if (trans->count == REGEXP_ALL_COUNTER) { + int i; + int count; + xmlRegTransPtr t; + xmlRegCounterPtr counter; + + ret = 1; + +#ifdef DEBUG_PUSH + printf("testing all %d\n", trans->count); +#endif + /* + * Check all counted transitions from the current state + */ + for (i = 0;i < exec->state->nbTrans;i++) { + t = &exec->state->trans[i]; + if ((t->counter < 0) || (t == trans)) + continue; + counter = &exec->comp->counters[t->counter]; + count = exec->counts[t->counter]; + if ((count < counter->min) || (count > counter->max)) { + ret = 0; + break; + } + } + } else if (trans->count >= 0) { + int count; + xmlRegCounterPtr counter; + + /* + * A counted transition. + */ + + count = exec->counts[trans->count]; + counter = &exec->comp->counters[trans->count]; +#ifdef DEBUG_PUSH + printf("testing count %d: val %d, min %d, max %d\n", + trans->count, count, counter->min, counter->max); +#endif + ret = ((count >= counter->min) && (count <= counter->max)); + } else if (atom == NULL) { + fprintf(stderr, "epsilon transition left at runtime\n"); + exec->status = -2; + break; + } else if (value != NULL) { + ret = xmlStrEqual(value, atom->valuep); + if ((ret == 1) && (trans->counter >= 0)) { + xmlRegCounterPtr counter; + int count; + + count = exec->counts[trans->counter]; + counter = &exec->comp->counters[trans->counter]; + if (count >= counter->max) + ret = 0; + } + + if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { + xmlRegStatePtr to = exec->comp->states[trans->to]; + + /* + * this is a multiple input sequence + */ + if (exec->state->nbTrans > exec->transno + 1) { + if (exec->inputStackNr <= 0) { + xmlFARegExecSaveInputString(exec, value, data); + } + xmlFARegExecSave(exec); + } + exec->transcount = 1; + do { + /* + * Try to progress as much as possible on the input + */ + if (exec->transcount == atom->max) { + break; + } + exec->index++; + value = exec->inputStack[exec->index].value; + data = exec->inputStack[exec->index].data; +#ifdef DEBUG_PUSH + printf("value loaded: %s\n", value); +#endif + + /* + * End of input: stop here + */ + if (value == NULL) { + exec->index --; + break; + } + if (exec->transcount >= atom->min) { + int transno = exec->transno; + xmlRegStatePtr state = exec->state; + + /* + * The transition is acceptable save it + */ + exec->transno = -1; /* trick */ + exec->state = to; + if (exec->inputStackNr <= 0) { + xmlFARegExecSaveInputString(exec, value, data); + } + xmlFARegExecSave(exec); + exec->transno = transno; + exec->state = state; + } + ret = xmlStrEqual(value, atom->valuep); + exec->transcount++; + } while (ret == 1); + if (exec->transcount < atom->min) + ret = 0; + + /* + * If the last check failed but one transition was found + * possible, rollback + */ + if (ret < 0) + ret = 0; + if (ret == 0) { + goto rollback; + } + } + } + if (ret == 1) { + if ((exec->callback != NULL) && (atom != NULL)) { + exec->callback(exec->data, atom->valuep, + atom->data, data); + } + if (exec->state->nbTrans > exec->transno + 1) { + if (exec->inputStackNr <= 0) { + xmlFARegExecSaveInputString(exec, value, data); + } + xmlFARegExecSave(exec); + } + if (trans->counter >= 0) { +#ifdef DEBUG_PUSH + printf("Increasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]++; + } +#ifdef DEBUG_PUSH + printf("entering state %d\n", trans->to); +#endif + exec->state = exec->comp->states[trans->to]; + exec->transno = 0; + if (trans->atom != NULL) { + if (exec->inputStack != NULL) { + exec->index++; + if (exec->index < exec->inputStackNr) { + value = exec->inputStack[exec->index].value; + data = exec->inputStack[exec->index].data; +#ifdef DEBUG_PUSH + printf("value loaded: %s\n", value); +#endif + } else { + value = NULL; + data = NULL; +#ifdef DEBUG_PUSH + printf("end of input\n"); +#endif + } + } else { + value = NULL; + data = NULL; +#ifdef DEBUG_PUSH + printf("end of input\n"); +#endif + } + } + goto progress; + } else if (ret < 0) { + exec->status = -4; + break; + } + } + if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { +rollback: + /* + * Failed to find a way out + */ + exec->determinist = 0; + xmlFARegExecRollBack(exec); + if (exec->status == 0) { + value = exec->inputStack[exec->index].value; + data = exec->inputStack[exec->index].data; +#ifdef DEBUG_PUSH + printf("value loaded: %s\n", value); +#endif + } + } +progress: + continue; + } + if (exec->status == 0) { + return(exec->state->type == XML_REGEXP_FINAL_STATE); + } + return(exec->status); +} + +#if 0 +static int +xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) { + xmlRegTransPtr trans; + xmlRegAtomPtr atom; + int ret; + int codepoint, len; + + if (exec == NULL) + return(-1); + if (exec->status != 0) + return(exec->status); + + while ((exec->status == 0) && + ((exec->inputString[exec->index] != 0) || + (exec->state->type != XML_REGEXP_FINAL_STATE))) { + + /* + * End of input on non-terminal state, rollback, however we may + * still have epsilon like transition for counted transitions + * on counters, in that case don't break too early. + */ + if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) + goto rollback; + + exec->transcount = 0; + for (;exec->transno < exec->state->nbTrans;exec->transno++) { + trans = &exec->state->trans[exec->transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + ret = 0; + if (trans->count >= 0) { + int count; + xmlRegCounterPtr counter; + + /* + * A counted transition. + */ + + count = exec->counts[trans->count]; + counter = &exec->comp->counters[trans->count]; +#ifdef DEBUG_REGEXP_EXEC + printf("testing count %d: val %d, min %d, max %d\n", + trans->count, count, counter->min, counter->max); +#endif + ret = ((count >= counter->min) && (count <= counter->max)); + } else if (atom == NULL) { + fprintf(stderr, "epsilon transition left at runtime\n"); + exec->status = -2; + break; + } else if (exec->inputString[exec->index] != 0) { + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); + ret = xmlRegCheckCharacter(atom, codepoint); + if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { + xmlRegStatePtr to = exec->comp->states[trans->to]; + + /* + * this is a multiple input sequence + */ + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + exec->transcount = 1; + do { + /* + * Try to progress as much as possible on the input + */ + if (exec->transcount == atom->max) { + break; + } + exec->index += len; + /* + * End of input: stop here + */ + if (exec->inputString[exec->index] == 0) { + exec->index -= len; + break; + } + if (exec->transcount >= atom->min) { + int transno = exec->transno; + xmlRegStatePtr state = exec->state; + + /* + * The transition is acceptable save it + */ + exec->transno = -1; /* trick */ + exec->state = to; + xmlFARegExecSave(exec); + exec->transno = transno; + exec->state = state; + } + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), + len); + ret = xmlRegCheckCharacter(atom, codepoint); + exec->transcount++; + } while (ret == 1); + if (exec->transcount < atom->min) + ret = 0; + + /* + * If the last check failed but one transition was found + * possible, rollback + */ + if (ret < 0) + ret = 0; + if (ret == 0) { + goto rollback; + } + } + } + if (ret == 1) { + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + if (trans->counter >= 0) { +#ifdef DEBUG_REGEXP_EXEC + printf("Increasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]++; + } +#ifdef DEBUG_REGEXP_EXEC + printf("entering state %d\n", trans->to); +#endif + exec->state = exec->comp->states[trans->to]; + exec->transno = 0; + if (trans->atom != NULL) { + exec->index += len; + } + goto progress; + } else if (ret < 0) { + exec->status = -4; + break; + } + } + if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { +rollback: + /* + * Failed to find a way out + */ + exec->determinist = 0; + xmlFARegExecRollBack(exec); + } +progress: + continue; + } +} +#endif +/************************************************************************ + * * + * Parser for the Shemas Datatype Regular Expressions * + * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * + * * + ************************************************************************/ + +/** + * xmlFAIsChar: + * @ctxt: a regexp parser context + * + * [10] Char ::= [^.\?*+()|#x5B#x5D] + */ +static int +xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { + int cur; + int len; + + cur = CUR_SCHAR(ctxt->cur, len); + if ((cur == '.') || (cur == '\\') || (cur == '?') || + (cur == '*') || (cur == '+') || (cur == '(') || + (cur == ')') || (cur == '|') || (cur == 0x5B) || + (cur == 0x5D) || (cur == 0)) + return(-1); + return(cur); +} + +/** + * xmlFAParseCharProp: + * @ctxt: a regexp parser context + * + * [27] charProp ::= IsCategory | IsBlock + * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | + * Separators | Symbols | Others + * [29] Letters ::= 'L' [ultmo]? + * [30] Marks ::= 'M' [nce]? + * [31] Numbers ::= 'N' [dlo]? + * [32] Punctuation ::= 'P' [cdseifo]? + * [33] Separators ::= 'Z' [slp]? + * [34] Symbols ::= 'S' [mcko]? + * [35] Others ::= 'C' [cfon]? + * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ + */ +static void +xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { + int cur; + xmlRegAtomType type = 0; + xmlChar *blockName = NULL; + + cur = CUR; + if (cur == 'L') { + NEXT; + cur = CUR; + if (cur == 'u') { + NEXT; + type = XML_REGEXP_LETTER_UPPERCASE; + } else if (cur == 'l') { + NEXT; + type = XML_REGEXP_LETTER_LOWERCASE; + } else if (cur == 't') { + NEXT; + type = XML_REGEXP_LETTER_TITLECASE; + } else if (cur == 'm') { + NEXT; + type = XML_REGEXP_LETTER_MODIFIER; + } else if (cur == 'o') { + NEXT; + type = XML_REGEXP_LETTER_OTHERS; + } else { + type = XML_REGEXP_LETTER; + } + } else if (cur == 'M') { + NEXT; + cur = CUR; + if (cur == 'n') { + NEXT; + /* nonspacing */ + type = XML_REGEXP_MARK_NONSPACING; + } else if (cur == 'c') { + NEXT; + /* spacing combining */ + type = XML_REGEXP_MARK_SPACECOMBINING; + } else if (cur == 'e') { + NEXT; + /* enclosing */ + type = XML_REGEXP_MARK_ENCLOSING; + } else { + /* all marks */ + type = XML_REGEXP_MARK; + } + } else if (cur == 'N') { + NEXT; + cur = CUR; + if (cur == 'd') { + NEXT; + /* digital */ + type = XML_REGEXP_NUMBER_DECIMAL; + } else if (cur == 'l') { + NEXT; + /* letter */ + type = XML_REGEXP_NUMBER_LETTER; + } else if (cur == 'o') { + NEXT; + /* other */ + type = XML_REGEXP_NUMBER_OTHERS; + } else { + /* all numbers */ + type = XML_REGEXP_NUMBER; + } + } else if (cur == 'P') { + NEXT; + cur = CUR; + if (cur == 'c') { + NEXT; + /* connector */ + type = XML_REGEXP_PUNCT_CONNECTOR; + } else if (cur == 'd') { + NEXT; + /* dash */ + type = XML_REGEXP_PUNCT_DASH; + } else if (cur == 's') { + NEXT; + /* open */ + type = XML_REGEXP_PUNCT_OPEN; + } else if (cur == 'e') { + NEXT; + /* close */ + type = XML_REGEXP_PUNCT_CLOSE; + } else if (cur == 'i') { + NEXT; + /* initial quote */ + type = XML_REGEXP_PUNCT_INITQUOTE; + } else if (cur == 'f') { + NEXT; + /* final quote */ + type = XML_REGEXP_PUNCT_FINQUOTE; + } else if (cur == 'o') { + NEXT; + /* other */ + type = XML_REGEXP_PUNCT_OTHERS; + } else { + /* all punctuation */ + type = XML_REGEXP_PUNCT; + } + } else if (cur == 'Z') { + NEXT; + cur = CUR; + if (cur == 's') { + NEXT; + /* space */ + type = XML_REGEXP_SEPAR_SPACE; + } else if (cur == 'l') { + NEXT; + /* line */ + type = XML_REGEXP_SEPAR_LINE; + } else if (cur == 'p') { + NEXT; + /* paragraph */ + type = XML_REGEXP_SEPAR_PARA; + } else { + /* all separators */ + type = XML_REGEXP_SEPAR; + } + } else if (cur == 'S') { + NEXT; + cur = CUR; + if (cur == 'm') { + NEXT; + type = XML_REGEXP_SYMBOL_MATH; + /* math */ + } else if (cur == 'c') { + NEXT; + type = XML_REGEXP_SYMBOL_CURRENCY; + /* currency */ + } else if (cur == 'k') { + NEXT; + type = XML_REGEXP_SYMBOL_MODIFIER; + /* modifiers */ + } else if (cur == 'o') { + NEXT; + type = XML_REGEXP_SYMBOL_OTHERS; + /* other */ + } else { + /* all symbols */ + type = XML_REGEXP_SYMBOL; + } + } else if (cur == 'C') { + NEXT; + cur = CUR; + if (cur == 'c') { + NEXT; + /* control */ + type = XML_REGEXP_OTHER_CONTROL; + } else if (cur == 'f') { + NEXT; + /* format */ + type = XML_REGEXP_OTHER_FORMAT; + } else if (cur == 'o') { + NEXT; + /* private use */ + type = XML_REGEXP_OTHER_PRIVATE; + } else if (cur == 'n') { + NEXT; + /* not assigned */ + type = XML_REGEXP_OTHER_NA; + } else { + /* all others */ + type = XML_REGEXP_OTHER; + } + } else if (cur == 'I') { + const xmlChar *start; + NEXT; + cur = CUR; + if (cur != 's') { + ERROR("IsXXXX expected"); + return; + } + NEXT; + start = ctxt->cur; + cur = CUR; + if (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z')) || + ((cur >= '0') && (cur <= '9')) || + (cur == 0x2D)) { + NEXT; + cur = CUR; + while (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z')) || + ((cur >= '0') && (cur <= '9')) || + (cur == 0x2D)) { + NEXT; + cur = CUR; + } + } + type = XML_REGEXP_BLOCK_NAME; + blockName = xmlStrndup(start, ctxt->cur - start); + } else { + ERROR("Unknown char property"); + return; + } + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, type); + if (ctxt->atom != NULL) + ctxt->atom->valuep = blockName; + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + type, 0, 0, blockName); + } +} + +/** + * xmlFAParseCharClassEsc: + * @ctxt: a regexp parser context + * + * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) + * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] + * [25] catEsc ::= '\p{' charProp '}' + * [26] complEsc ::= '\P{' charProp '}' + * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) + */ +static void +xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { + int cur; + + if (CUR == '.') { + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR); + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_ANYCHAR, 0, 0, NULL); + } + NEXT; + return; + } + if (CUR != '\\') { + ERROR("Escaped sequence: expecting \\"); + return; + } + NEXT; + cur = CUR; + if (cur == 'p') { + NEXT; + if (CUR != '{') { + ERROR("Expecting '{'"); + return; + } + NEXT; + xmlFAParseCharProp(ctxt); + if (CUR != '}') { + ERROR("Expecting '}'"); + return; + } + NEXT; + } else if (cur == 'P') { + NEXT; + if (CUR != '{') { + ERROR("Expecting '{'"); + return; + } + NEXT; + xmlFAParseCharProp(ctxt); + ctxt->atom->neg = 1; + if (CUR != '}') { + ERROR("Expecting '}'"); + return; + } + NEXT; + } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') || + (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') || + (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || + (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) || + (cur == 0x5E)) { + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); + if (ctxt->atom != NULL) + ctxt->atom->codepoint = cur; + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, cur, cur, NULL); + } + NEXT; + } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || + (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || + (cur == 'w') || (cur == 'W')) { + xmlRegAtomType type = XML_REGEXP_ANYSPACE; + + switch (cur) { + case 's': + type = XML_REGEXP_ANYSPACE; + break; + case 'S': + type = XML_REGEXP_NOTSPACE; + break; + case 'i': + type = XML_REGEXP_INITNAME; + break; + case 'I': + type = XML_REGEXP_NOTINITNAME; + break; + case 'c': + type = XML_REGEXP_NAMECHAR; + break; + case 'C': + type = XML_REGEXP_NOTNAMECHAR; + break; + case 'd': + type = XML_REGEXP_DECIMAL; + break; + case 'D': + type = XML_REGEXP_NOTDECIMAL; + break; + case 'w': + type = XML_REGEXP_REALCHAR; + break; + case 'W': + type = XML_REGEXP_NOTREALCHAR; + break; + } + NEXT; + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, type); + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + type, 0, 0, NULL); + } + } +} + +/** + * xmlFAParseCharRef: + * @ctxt: a regexp parser context + * + * [19] XmlCharRef ::= ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' ) + */ +static int +xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) { + int ret = 0, cur; + + if ((CUR != '&') || (NXT(1) != '#')) + return(-1); + NEXT; + NEXT; + cur = CUR; + if (cur == 'x') { + NEXT; + cur = CUR; + if (((cur >= '0') && (cur <= '9')) || + ((cur >= 'a') && (cur <= 'f')) || + ((cur >= 'A') && (cur <= 'F'))) { + while (((cur >= '0') && (cur <= '9')) || + ((cur >= 'A') && (cur <= 'F'))) { + if ((cur >= '0') && (cur <= '9')) + ret = ret * 16 + cur - '0'; + else if ((cur >= 'a') && (cur <= 'f')) + ret = ret * 16 + 10 + (cur - 'a'); + else + ret = ret * 16 + 10 + (cur - 'A'); + NEXT; + cur = CUR; + } + } else { + ERROR("Char ref: expecting [0-9A-F]"); + return(-1); + } + } else { + if ((cur >= '0') && (cur <= '9')) { + while ((cur >= '0') && (cur <= '9')) { + ret = ret * 10 + cur - '0'; + NEXT; + cur = CUR; + } + } else { + ERROR("Char ref: expecting [0-9]"); + return(-1); + } + } + if (cur != ';') { + ERROR("Char ref: expecting ';'"); + return(-1); + } else { + NEXT; + } + return(ret); +} + +/** + * xmlFAParseCharRange: + * @ctxt: a regexp parser context + * + * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash + * [18] seRange ::= charOrEsc '-' charOrEsc + * [20] charOrEsc ::= XmlChar | SingleCharEsc + * [21] XmlChar ::= [^\#x2D#x5B#x5D] + * [22] XmlCharIncDash ::= [^\#x5B#x5D] + */ +static void +xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { + int cur; + int start = -1; + int end = -1; + + if ((CUR == '&') && (NXT(1) == '#')) { + end = start = xmlFAParseCharRef(ctxt); + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, start, end, NULL); + return; + } + cur = CUR; + if (cur == '\\') { + NEXT; + cur = CUR; + switch (cur) { + case 'n': start = 0xA; break; + case 'r': start = 0xD; break; + case 't': start = 0x9; break; + case '\\': case '|': case '.': case '-': case '^': case '?': + case '*': case '+': case '{': case '}': case '(': case ')': + case '[': case ']': + start = cur; break; + default: + ERROR("Invalid escape value"); + return; + } + end = start; + } else if ((cur != 0x5B) && (cur != 0x5D)) { + end = start = cur; + } else { + ERROR("Expecting a char range"); + return; + } + NEXT; + if (start == '-') { + return; + } + cur = CUR; + if (cur != '-') { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, start, end, NULL); + return; + } + NEXT; + cur = CUR; + if (cur == '\\') { + NEXT; + cur = CUR; + switch (cur) { + case 'n': end = 0xA; break; + case 'r': end = 0xD; break; + case 't': end = 0x9; break; + case '\\': case '|': case '.': case '-': case '^': case '?': + case '*': case '+': case '{': case '}': case '(': case ')': + case '[': case ']': + end = cur; break; + default: + ERROR("Invalid escape value"); + return; + } + } else if ((cur != 0x5B) && (cur != 0x5D)) { + end = cur; + } else { + ERROR("Expecting the end of a char range"); + return; + } + NEXT; + /* TODO check that the values are acceptable character ranges for XML */ + if (end < start) { + ERROR("End of range is before start of range"); + } else { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, start, end, NULL); + } + return; +} + +/** + * xmlFAParsePosCharGroup: + * @ctxt: a regexp parser context + * + * [14] posCharGroup ::= ( charRange | charClassEsc )+ + */ +static void +xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) { + do { + if ((CUR == '\\') || (CUR == '.')) { + xmlFAParseCharClassEsc(ctxt); + } else { + xmlFAParseCharRange(ctxt); + } + } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && + (ctxt->error == 0)); +} + +/** + * xmlFAParseCharGroup: + * @ctxt: a regexp parser context + * + * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub + * [15] negCharGroup ::= '^' posCharGroup + * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr + * [12] charClassExpr ::= '[' charGroup ']' + */ +static void +xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { + int n = ctxt->neg; + while ((CUR != ']') && (ctxt->error == 0)) { + if (CUR == '^') { + int neg = ctxt->neg; + + NEXT; + ctxt->neg = !ctxt->neg; + xmlFAParsePosCharGroup(ctxt); + ctxt->neg = neg; + } else if (CUR == '-') { + NEXT; + ctxt->neg = !ctxt->neg; + if (CUR != '[') { + ERROR("charClassExpr: '[' expected"); + break; + } + NEXT; + xmlFAParseCharGroup(ctxt); + if (CUR == ']') { + NEXT; + } else { + ERROR("charClassExpr: ']' expected"); + break; + } + break; + } else if (CUR != ']') { + xmlFAParsePosCharGroup(ctxt); + } + } + ctxt->neg = n; +} + +/** + * xmlFAParseCharClass: + * @ctxt: a regexp parser context + * + * [11] charClass ::= charClassEsc | charClassExpr + * [12] charClassExpr ::= '[' charGroup ']' + */ +static void +xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) { + if (CUR == '[') { + NEXT; + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES); + if (ctxt->atom == NULL) + return; + xmlFAParseCharGroup(ctxt); + if (CUR == ']') { + NEXT; + } else { + ERROR("xmlFAParseCharClass: ']' expected"); + } + } else { + xmlFAParseCharClassEsc(ctxt); + } +} + +/** + * xmlFAParseQuantExact: + * @ctxt: a regexp parser context + * + * [8] QuantExact ::= [0-9]+ + */ +static int +xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) { + int ret = 0; + int ok = 0; + + while ((CUR >= '0') && (CUR <= '9')) { + ret = ret * 10 + (CUR - '0'); + ok = 1; + NEXT; + } + if (ok != 1) { + return(-1); + } + return(ret); +} + +/** + * xmlFAParseQuantifier: + * @ctxt: a regexp parser context + * + * [4] quantifier ::= [?*+] | ( '{' quantity '}' ) + * [5] quantity ::= quantRange | quantMin | QuantExact + * [6] quantRange ::= QuantExact ',' QuantExact + * [7] quantMin ::= QuantExact ',' + * [8] QuantExact ::= [0-9]+ + */ +static int +xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) { + int cur; + + cur = CUR; + if ((cur == '?') || (cur == '*') || (cur == '+')) { + if (ctxt->atom != NULL) { + if (cur == '?') + ctxt->atom->quant = XML_REGEXP_QUANT_OPT; + else if (cur == '*') + ctxt->atom->quant = XML_REGEXP_QUANT_MULT; + else if (cur == '+') + ctxt->atom->quant = XML_REGEXP_QUANT_PLUS; + } + NEXT; + return(1); + } + if (cur == '{') { + int min = 0, max = 0; + + NEXT; + cur = xmlFAParseQuantExact(ctxt); + if (cur >= 0) + min = cur; + if (CUR == ',') { + NEXT; + cur = xmlFAParseQuantExact(ctxt); + if (cur >= 0) + max = cur; + } + if (CUR == '}') { + NEXT; + } else { + ERROR("Unterminated quantifier"); + } + if (max == 0) + max = min; + if (ctxt->atom != NULL) { + ctxt->atom->quant = XML_REGEXP_QUANT_RANGE; + ctxt->atom->min = min; + ctxt->atom->max = max; + } + return(1); + } + return(0); +} + +/** + * xmlFAParseAtom: + * @ctxt: a regexp parser context + * + * [9] atom ::= Char | charClass | ( '(' regExp ')' ) + */ +static int +xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { + int codepoint, len; + + codepoint = xmlFAIsChar(ctxt); + if (codepoint > 0) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); + if (ctxt->atom == NULL) + return(-1); + codepoint = CUR_SCHAR(ctxt->cur, len); + ctxt->atom->codepoint = codepoint; + NEXTL(len); + return(1); + } else if (CUR == '|') { + return(0); + } else if (CUR == 0) { + return(0); + } else if (CUR == ')') { + return(0); + } else if (CUR == '(') { + xmlRegStatePtr start, oldend; + + NEXT; + xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); + start = ctxt->state; + oldend = ctxt->end; + ctxt->end = NULL; + ctxt->atom = NULL; + xmlFAParseRegExp(ctxt, 0); + if (CUR == ')') { + NEXT; + } else { + ERROR("xmlFAParseAtom: expecting ')'"); + } + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG); + if (ctxt->atom == NULL) + return(-1); + ctxt->atom->start = start; + ctxt->atom->stop = ctxt->state; + ctxt->end = oldend; + return(1); + } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) { + xmlFAParseCharClass(ctxt); + return(1); + } + return(0); +} + +/** + * xmlFAParsePiece: + * @ctxt: a regexp parser context + * + * [3] piece ::= atom quantifier? + */ +static int +xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { + int ret; + + ctxt->atom = NULL; + ret = xmlFAParseAtom(ctxt); + if (ret == 0) + return(0); + if (ctxt->atom == NULL) { + ERROR("internal: no atom generated"); + } + xmlFAParseQuantifier(ctxt); + return(1); +} + +/** + * xmlFAParseBranch: + * @ctxt: a regexp parser context + * @first: is taht the first + * + * [2] branch ::= piece* + */ +static void +xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) { + xmlRegStatePtr previous; + xmlRegAtomPtr prevatom = NULL; + int ret; + + previous = ctxt->state; + ret = xmlFAParsePiece(ctxt); + if (ret != 0) { + if (first) { + xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom); + previous = ctxt->state; + } else { + prevatom = ctxt->atom; + } + ctxt->atom = NULL; + } + while ((ret != 0) && (ctxt->error == 0)) { + ret = xmlFAParsePiece(ctxt); + if (ret != 0) { + if (first) { + xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom); + } else { + xmlFAGenerateTransitions(ctxt, previous, NULL, prevatom); + prevatom = ctxt->atom; + } + previous = ctxt->state; + ctxt->atom = NULL; + } + } + if (!first) { + xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom); + } +} + +/** + * xmlFAParseRegExp: + * @ctxt: a regexp parser context + * @top: is that the top-level expressions ? + * + * [1] regExp ::= branch ( '|' branch )* + */ +static void +xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { + xmlRegStatePtr start, end, oldend; + + oldend = ctxt->end; + + start = ctxt->state; + xmlFAParseBranch(ctxt, (ctxt->end == NULL)); + if (CUR != '|') { + ctxt->end = ctxt->state; + return; + } + end = ctxt->state; + while ((CUR == '|') && (ctxt->error == 0)) { + NEXT; + ctxt->state = start; + ctxt->end = end; + xmlFAParseBranch(ctxt, 0); + } + if (!top) + ctxt->end = oldend; +} + +/************************************************************************ + * * + * The basic API * + * * + ************************************************************************/ + +/** + * xmlRegexpPrint: + * @output: the file for the output debug + * @regexp: the compiled regexp + * + * Print the content of the compiled regular expression + */ +void +xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) { + int i; + + fprintf(output, " regexp: "); + if (regexp == NULL) { + fprintf(output, "NULL\n"); + return; + } + fprintf(output, "'%s' ", regexp->string); + fprintf(output, "\n"); + fprintf(output, "%d atoms:\n", regexp->nbAtoms); + for (i = 0;i < regexp->nbAtoms; i++) { + fprintf(output, " %02d ", i); + xmlRegPrintAtom(output, regexp->atoms[i]); + } + fprintf(output, "%d states:", regexp->nbStates); + fprintf(output, "\n"); + for (i = 0;i < regexp->nbStates; i++) { + xmlRegPrintState(output, regexp->states[i]); + } + fprintf(output, "%d counters:\n", regexp->nbCounters); + for (i = 0;i < regexp->nbCounters; i++) { + fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min, + regexp->counters[i].max); + } +} + +/** + * xmlRegexpCompile: + * @regexp: a regular expression string + * + * Parses a regular expression conforming to XML Schemas Part 2 Datatype + * Appendix F and build an automata suitable for testing strings against + * that regular expression + * + * Returns the compiled expression or NULL in case of error + */ +xmlRegexpPtr +xmlRegexpCompile(const xmlChar *regexp) { + xmlRegexpPtr ret; + xmlRegParserCtxtPtr ctxt; + + ctxt = xmlRegNewParserCtxt(regexp); + if (ctxt == NULL) + return(NULL); + + /* initialize the parser */ + ctxt->end = NULL; + ctxt->start = ctxt->state = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, ctxt->start); + + /* parse the expression building an automata */ + xmlFAParseRegExp(ctxt, 1); + if (CUR != 0) { + ERROR("xmlFAParseRegExp: extra characters"); + } + ctxt->end = ctxt->state; + ctxt->start->type = XML_REGEXP_START_STATE; + ctxt->end->type = XML_REGEXP_FINAL_STATE; + + /* remove the Epsilon except for counted transitions */ + xmlFAEliminateEpsilonTransitions(ctxt); + + + if (ctxt->error != 0) { + xmlRegFreeParserCtxt(ctxt); + return(NULL); + } + ret = xmlRegEpxFromParse(ctxt); + xmlRegFreeParserCtxt(ctxt); + return(ret); +} + +/** + * xmlRegexpExec: + * @comp: the compiled regular expression + * @content: the value to check against the regular expression + * + * Check if the regular expression generate the value + * + * Returns 1 if it matches, 0 if not and a negativa value in case of error + */ +int +xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) { + if ((comp == NULL) || (content == NULL)) + return(-1); + return(xmlFARegExec(comp, content)); +} + +/** + * xmlRegexpIsDeterminist: + * @comp: the compiled regular expression + * + * Check if the regular expression is determinist + * + * Returns 1 if it yes, 0 if not and a negativa value in case of error + */ +int +xmlRegexpIsDeterminist(xmlRegexpPtr comp) { + xmlAutomataPtr am; + int ret; + + if (comp == NULL) + return(-1); + if (comp->determinist != -1) + return(comp->determinist); + + am = xmlNewAutomata(); + if (am->states != NULL) { + int i; + + for (i = 0;i < am->nbStates;i++) + xmlRegFreeState(am->states[i]); + xmlFree(am->states); + } + am->nbAtoms = comp->nbAtoms; + am->atoms = comp->atoms; + am->nbStates = comp->nbStates; + am->states = comp->states; + am->determinist = -1; + ret = xmlFAComputesDeterminism(am); + am->atoms = NULL; + am->states = NULL; + xmlFreeAutomata(am); + return(ret); +} + +/** + * xmlRegFreeRegexp: + * @regexp: the regexp + * + * Free a regexp + */ +void +xmlRegFreeRegexp(xmlRegexpPtr regexp) { + int i; + if (regexp == NULL) + return; + + if (regexp->string != NULL) + xmlFree(regexp->string); + if (regexp->states != NULL) { + for (i = 0;i < regexp->nbStates;i++) + xmlRegFreeState(regexp->states[i]); + xmlFree(regexp->states); + } + if (regexp->atoms != NULL) { + for (i = 0;i < regexp->nbAtoms;i++) + xmlRegFreeAtom(regexp->atoms[i]); + xmlFree(regexp->atoms); + } + if (regexp->counters != NULL) + xmlFree(regexp->counters); + if (regexp->compact != NULL) + xmlFree(regexp->compact); + if (regexp->transdata != NULL) + xmlFree(regexp->transdata); + if (regexp->stringMap != NULL) { + for (i = 0; i < regexp->nbstrings;i++) + xmlFree(regexp->stringMap[i]); + xmlFree(regexp->stringMap); + } + + xmlFree(regexp); +} + +#ifdef LIBXML_AUTOMATA_ENABLED +/************************************************************************ + * * + * The Automata interface * + * * + ************************************************************************/ + +/** + * xmlNewAutomata: + * + * Create a new automata + * + * Returns the new object or NULL in case of failure + */ +xmlAutomataPtr +xmlNewAutomata(void) { + xmlAutomataPtr ctxt; + + ctxt = xmlRegNewParserCtxt(NULL); + if (ctxt == NULL) + return(NULL); + + /* initialize the parser */ + ctxt->end = NULL; + ctxt->start = ctxt->state = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, ctxt->start); + + return(ctxt); +} + +/** + * xmlFreeAutomata: + * @am: an automata + * + * Free an automata + */ +void +xmlFreeAutomata(xmlAutomataPtr am) { + if (am == NULL) + return; + xmlRegFreeParserCtxt(am); +} + +/** + * xmlAutomataGetInitState: + * @am: an automata + * + * Initial state lookup + * + * Returns the initial state of the automata + */ +xmlAutomataStatePtr +xmlAutomataGetInitState(xmlAutomataPtr am) { + if (am == NULL) + return(NULL); + return(am->start); +} + +/** + * xmlAutomataSetFinalState: + * @am: an automata + * @state: a state in this automata + * + * Makes that state a final state + * + * Returns 0 or -1 in case of error + */ +int +xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) { + if ((am == NULL) || (state == NULL)) + return(-1); + state->type = XML_REGEXP_FINAL_STATE; + return(0); +} + +/** + * xmlAutomataNewTransition: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * @token: the input string associated to that transition + * @data: data passed to the callback function if the transition is activated + * + * If @to is NULL, this create first a new target state in the automata + * and then adds a transition from the @from state to the target state + * activated by the value of @token + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to, const xmlChar *token, + void *data) { + xmlRegAtomPtr atom; + + if ((am == NULL) || (from == NULL) || (token == NULL)) + return(NULL); + atom = xmlRegNewAtom(am, XML_REGEXP_STRING); + atom->data = data; + if (atom == NULL) + return(NULL); + atom->valuep = xmlStrdup(token); + + xmlFAGenerateTransitions(am, from, to, atom); + if (to == NULL) + return(am->state); + return(to); +} + +/** + * xmlAutomataNewCountTrans: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * @token: the input string associated to that transition + * @min: the minimum successive occurences of token + * @max: the maximum successive occurences of token + * @data: data associated to the transition + * + * If @to is NULL, this create first a new target state in the automata + * and then adds a transition from the @from state to the target state + * activated by a succession of input of value @token and whose number + * is between @min and @max + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to, const xmlChar *token, + int min, int max, void *data) { + xmlRegAtomPtr atom; + + if ((am == NULL) || (from == NULL) || (token == NULL)) + return(NULL); + if (min < 0) + return(NULL); + if ((max < min) || (max < 1)) + return(NULL); + atom = xmlRegNewAtom(am, XML_REGEXP_STRING); + if (atom == NULL) + return(NULL); + atom->valuep = xmlStrdup(token); + atom->data = data; + if (min == 0) + atom->min = 1; + else + atom->min = min; + atom->max = max; + + xmlFAGenerateTransitions(am, from, to, atom); + if (to == NULL) + to = am->state; + if (to == NULL) + return(NULL); + if (min == 0) + xmlFAGenerateEpsilonTransition(am, from, to); + return(to); +} + +/** + * xmlAutomataNewOnceTrans: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * @token: the input string associated to that transition + * @min: the minimum successive occurences of token + * @max: the maximum successive occurences of token + * @data: data associated to the transition + * + * If @to is NULL, this create first a new target state in the automata + * and then adds a transition from the @from state to the target state + * activated by a succession of input of value @token and whose number + * is between @min and @max, moreover that transistion can only be crossed + * once. + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to, const xmlChar *token, + int min, int max, void *data) { + xmlRegAtomPtr atom; + int counter; + + if ((am == NULL) || (from == NULL) || (token == NULL)) + return(NULL); + if (min < 1) + return(NULL); + if ((max < min) || (max < 1)) + return(NULL); + atom = xmlRegNewAtom(am, XML_REGEXP_STRING); + if (atom == NULL) + return(NULL); + atom->valuep = xmlStrdup(token); + atom->data = data; + atom->quant = XML_REGEXP_QUANT_ONCEONLY; + if (min == 0) + atom->min = 1; + else + atom->min = min; + atom->max = max; + /* + * associate a counter to the transition. + */ + counter = xmlRegGetCounter(am); + am->counters[counter].min = 1; + am->counters[counter].max = 1; + + /* xmlFAGenerateTransitions(am, from, to, atom); */ + if (to == NULL) { + to = xmlRegNewState(am); + xmlRegStatePush(am, to); + } + xmlRegStateAddTrans(am, from, atom, to, counter, -1); + xmlRegAtomPush(am, atom); + am->state = to; + if (to == NULL) + to = am->state; + if (to == NULL) + return(NULL); + return(to); +} + +/** + * xmlAutomataNewState: + * @am: an automata + * + * Create a new disconnected state in the automata + * + * Returns the new state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewState(xmlAutomataPtr am) { + xmlAutomataStatePtr to; + + if (am == NULL) + return(NULL); + to = xmlRegNewState(am); + xmlRegStatePush(am, to); + return(to); +} + +/** + * xmlAutomataNewEpsilon: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * + * If @to is NULL, this create first a new target state in the automata + * and then adds a an epsilon transition from the @from state to the + * target state + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to) { + if ((am == NULL) || (from == NULL)) + return(NULL); + xmlFAGenerateEpsilonTransition(am, from, to); + if (to == NULL) + return(am->state); + return(to); +} + +/** + * xmlAutomataNewAllTrans: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * @lax: allow to transition if not all all transitions have been activated + * + * If @to is NULL, this create first a new target state in the automata + * and then adds a an ALL transition from the @from state to the + * target state. That transition is an epsilon transition allowed only when + * all transitions from the @from node have been activated. + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to, int lax) { + if ((am == NULL) || (from == NULL)) + return(NULL); + xmlFAGenerateAllTransition(am, from, to, lax); + if (to == NULL) + return(am->state); + return(to); +} + +/** + * xmlAutomataNewCounter: + * @am: an automata + * @min: the minimal value on the counter + * @max: the maximal value on the counter + * + * Create a new counter + * + * Returns the counter number or -1 in case of error + */ +int +xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) { + int ret; + + if (am == NULL) + return(-1); + + ret = xmlRegGetCounter(am); + if (ret < 0) + return(-1); + am->counters[ret].min = min; + am->counters[ret].max = max; + return(ret); +} + +/** + * xmlAutomataNewCountedTrans: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * @counter: the counter associated to that transition + * + * If @to is NULL, this create first a new target state in the automata + * and then adds an epsilon transition from the @from state to the target state + * which will increment the counter provided + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to, int counter) { + if ((am == NULL) || (from == NULL) || (counter < 0)) + return(NULL); + xmlFAGenerateCountedEpsilonTransition(am, from, to, counter); + if (to == NULL) + return(am->state); + return(to); +} + +/** + * xmlAutomataNewCounterTrans: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * @counter: the counter associated to that transition + * + * If @to is NULL, this create first a new target state in the automata + * and then adds an epsilon transition from the @from state to the target state + * which will be allowed only if the counter is within the right range. + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to, int counter) { + if ((am == NULL) || (from == NULL) || (counter < 0)) + return(NULL); + xmlFAGenerateCountedTransition(am, from, to, counter); + if (to == NULL) + return(am->state); + return(to); +} + +/** + * xmlAutomataCompile: + * @am: an automata + * + * Compile the automata into a Reg Exp ready for being executed. + * The automata should be free after this point. + * + * Returns the compiled regexp or NULL in case of error + */ +xmlRegexpPtr +xmlAutomataCompile(xmlAutomataPtr am) { + xmlRegexpPtr ret; + + xmlFAEliminateEpsilonTransitions(am); + /* xmlFAComputesDeterminism(am); */ + ret = xmlRegEpxFromParse(am); + + return(ret); +} + +/** + * xmlAutomataIsDeterminist: + * @am: an automata + * + * Checks if an automata is determinist. + * + * Returns 1 if true, 0 if not, and -1 in case of error + */ +int +xmlAutomataIsDeterminist(xmlAutomataPtr am) { + int ret; + + if (am == NULL) + return(-1); + + ret = xmlFAComputesDeterminism(am); + return(ret); +} +#endif /* LIBXML_AUTOMATA_ENABLED */ +#endif /* LIBXML_REGEXP_ENABLED */ diff --git a/bundle/libxml/xmlschemas.c b/bundle/libxml/xmlschemas.c new file mode 100644 index 0000000000..9bafb2dcf9 --- /dev/null +++ b/bundle/libxml/xmlschemas.c @@ -0,0 +1,5468 @@ +/* + * schemas.c : implementation of the XML Schema handling and + * schema validity checking + * + * See Copyright for the status of this software. + * + * Daniel Veillard <veillard@redhat.com> + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/hash.h> +#include <libxml/uri.h> + +#include <libxml/xmlschemas.h> +#include <libxml/schemasInternals.h> +#include <libxml/xmlschemastypes.h> +#include <libxml/xmlautomata.h> +#include <libxml/xmlregexp.h> + +#define DEBUG 1 /* very verbose output */ +#define DEBUG_CONTENT 1 +#define DEBUG_TYPE 1 +/* #define DEBUG_CONTENT_REGEXP 1 */ +/* #define DEBUG_AUTOMATA 1 */ + +#define UNBOUNDED (1 << 30) +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#define XML_SCHEMAS_DEFAULT_NAMESPACE (const xmlChar *)"the default namespace" + +/* + * The XML Schemas namespaces + */ +static const xmlChar *xmlSchemaNs = (const xmlChar *) + "http://www.w3.org/2001/XMLSchema"; + +static const xmlChar *xmlSchemaInstanceNs = (const xmlChar *) + "http://www.w3.org/2001/XMLSchema-instance"; + +#define IS_SCHEMA(node, type) \ + ((node != NULL) && (node->ns != NULL) && \ + (xmlStrEqual(node->name, (const xmlChar *) type)) && \ + (xmlStrEqual(node->ns->href, xmlSchemaNs))) + +#define XML_SCHEMAS_PARSE_ERROR 1 + +struct _xmlSchemaParserCtxt { + void *userData; /* user specific data block */ + xmlSchemaValidityErrorFunc error; /* the callback in case of errors */ + xmlSchemaValidityWarningFunc warning;/* the callback in case of warning */ + xmlSchemaValidError err; + + xmlSchemaPtr schema; /* The schema in use */ + xmlChar *container; /* the current element, group, ... */ + int counter; + + xmlChar *URL; + xmlDocPtr doc; + + const char *buffer; + int size; + + /* + * Used to build complex element content models + */ + xmlAutomataPtr am; + xmlAutomataStatePtr start; + xmlAutomataStatePtr end; + xmlAutomataStatePtr state; +}; + + +#define XML_SCHEMAS_ATTR_UNKNOWN 1 +#define XML_SCHEMAS_ATTR_CHECKED 2 + +typedef struct _xmlSchemaAttrState xmlSchemaAttrState; +typedef xmlSchemaAttrState *xmlSchemaAttrStatePtr; +struct _xmlSchemaAttrState { + xmlAttrPtr attr; + int state; +}; + +/** + * xmlSchemaValidCtxt: + * + * A Schemas validation context + */ + +struct _xmlSchemaValidCtxt { + void *userData; /* user specific data block */ + xmlSchemaValidityErrorFunc error; /* the callback in case of errors */ + xmlSchemaValidityWarningFunc warning;/* the callback in case of warning */ + + xmlSchemaPtr schema; /* The schema in use */ + xmlDocPtr doc; + xmlParserInputBufferPtr input; + xmlCharEncoding enc; + xmlSAXHandlerPtr sax; + void *user_data; + + xmlDocPtr myDoc; + int err; + + xmlNodePtr node; + xmlSchemaTypePtr type; + + xmlRegExecCtxtPtr regexp; + xmlSchemaValPtr value; + + int attrNr; + int attrBase; + int attrMax; + xmlSchemaAttrStatePtr attr; +}; + + +/************************************************************************ + * * + * Some predeclarations * + * * + ************************************************************************/ +static int xmlSchemaValidateSimpleValue(xmlSchemaValidCtxtPtr ctxt, + xmlSchemaTypePtr type, + xmlChar *value); + +/************************************************************************ + * * + * Allocation functions * + * * + ************************************************************************/ + +/** + * xmlSchemaNewSchema: + * @ctxt: a schema validation context (optional) + * + * Allocate a new Schema structure. + * + * Returns the newly allocated structure or NULL in case or error + */ +static xmlSchemaPtr +xmlSchemaNewSchema(xmlSchemaParserCtxtPtr ctxt) +{ + xmlSchemaPtr ret; + + ret = (xmlSchemaPtr) xmlMalloc(sizeof(xmlSchema)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchema)); + + return (ret); +} + +/** + * xmlSchemaNewFacet: + * @ctxt: a schema validation context (optional) + * + * Allocate a new Facet structure. + * + * Returns the newly allocated structure or NULL in case or error + */ +static xmlSchemaFacetPtr +xmlSchemaNewFacet(xmlSchemaParserCtxtPtr ctxt) +{ + xmlSchemaFacetPtr ret; + + ret = (xmlSchemaFacetPtr) xmlMalloc(sizeof(xmlSchemaFacet)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaFacet)); + + return (ret); +} + +/** + * xmlSchemaNewAnnot: + * @ctxt: a schema validation context (optional) + * @node: a node + * + * Allocate a new annotation structure. + * + * Returns the newly allocated structure or NULL in case or error + */ +static xmlSchemaAnnotPtr +xmlSchemaNewAnnot(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) +{ + xmlSchemaAnnotPtr ret; + + ret = (xmlSchemaAnnotPtr) xmlMalloc(sizeof(xmlSchemaAnnot)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaAnnot)); + ret->content = node; + return (ret); +} + +/** + * xmlSchemaFreeAnnot: + * @annot: a schema type structure + * + * Deallocate a annotation structure + */ +static void +xmlSchemaFreeAnnot(xmlSchemaAnnotPtr annot) +{ + if (annot == NULL) + return; + xmlFree(annot); +} + +/** + * xmlSchemaFreeNotation: + * @schema: a schema notation structure + * + * Deallocate a Schema Notation structure. + */ +static void +xmlSchemaFreeNotation(xmlSchemaNotationPtr nota) +{ + if (nota == NULL) + return; + if (nota->name != NULL) + xmlFree((xmlChar *) nota->name); + xmlFree(nota); +} + +/** + * xmlSchemaFreeAttribute: + * @schema: a schema attribute structure + * + * Deallocate a Schema Attribute structure. + */ +static void +xmlSchemaFreeAttribute(xmlSchemaAttributePtr attr) +{ + if (attr == NULL) + return; + if (attr->name != NULL) + xmlFree((xmlChar *) attr->name); + if (attr->ref != NULL) + xmlFree((xmlChar *) attr->ref); + if (attr->refNs != NULL) + xmlFree((xmlChar *) attr->refNs); + if (attr->typeName != NULL) + xmlFree((xmlChar *) attr->typeName); + if (attr->typeNs != NULL) + xmlFree((xmlChar *) attr->typeNs); + xmlFree(attr); +} + +/** + * xmlSchemaFreeAttributeGroup: + * @schema: a schema attribute group structure + * + * Deallocate a Schema Attribute Group structure. + */ +static void +xmlSchemaFreeAttributeGroup(xmlSchemaAttributeGroupPtr attr) +{ + if (attr == NULL) + return; + if (attr->name != NULL) + xmlFree((xmlChar *) attr->name); + xmlFree(attr); +} + +/** + * xmlSchemaFreeElement: + * @schema: a schema element structure + * + * Deallocate a Schema Element structure. + */ +static void +xmlSchemaFreeElement(xmlSchemaElementPtr elem) +{ + if (elem == NULL) + return; + if (elem->name != NULL) + xmlFree((xmlChar *) elem->name); + if (elem->namedType != NULL) + xmlFree((xmlChar *) elem->namedType); + if (elem->namedTypeNs != NULL) + xmlFree((xmlChar *) elem->namedTypeNs); + if (elem->ref != NULL) + xmlFree((xmlChar *) elem->ref); + if (elem->refNs != NULL) + xmlFree((xmlChar *) elem->refNs); + if (elem->annot != NULL) + xmlSchemaFreeAnnot(elem->annot); + if (elem->contModel != NULL) + xmlRegFreeRegexp(elem->contModel); + xmlFree(elem); +} + +/** + * xmlSchemaFreeFacet: + * @facet: a schema facet structure + * + * Deallocate a Schema Facet structure. + */ +static void +xmlSchemaFreeFacet(xmlSchemaFacetPtr facet) +{ + if (facet == NULL) + return; + if (facet->value != NULL) + xmlFree((xmlChar *) facet->value); + if (facet->id != NULL) + xmlFree((xmlChar *) facet->id); + if (facet->val != NULL) + xmlSchemaFreeValue(facet->val); + if (facet->regexp != NULL) + xmlRegFreeRegexp(facet->regexp); + if (facet->annot != NULL) + xmlSchemaFreeAnnot(facet->annot); + xmlFree(facet); +} + +/** + * xmlSchemaFreeType: + * @type: a schema type structure + * + * Deallocate a Schema Type structure. + */ +void +xmlSchemaFreeType(xmlSchemaTypePtr type) +{ + if (type == NULL) + return; + if (type->name != NULL) + xmlFree((xmlChar *) type->name); + if (type->base != NULL) + xmlFree((xmlChar *) type->base); + if (type->baseNs != NULL) + xmlFree((xmlChar *) type->baseNs); + if (type->annot != NULL) + xmlSchemaFreeAnnot(type->annot); + if (type->facets != NULL) { + xmlSchemaFacetPtr facet, next; + + facet = type->facets; + while (facet != NULL) { + next = facet->next; + xmlSchemaFreeFacet(facet); + facet = next; + } + } + xmlFree(type); +} + +/** + * xmlSchemaFree: + * @schema: a schema structure + * + * Deallocate a Schema structure. + */ +void +xmlSchemaFree(xmlSchemaPtr schema) +{ + if (schema == NULL) + return; + + if (schema->name != NULL) + xmlFree((xmlChar *) schema->name); + if (schema->notaDecl != NULL) + xmlHashFree(schema->notaDecl, + (xmlHashDeallocator) xmlSchemaFreeNotation); + if (schema->attrDecl != NULL) + xmlHashFree(schema->attrDecl, + (xmlHashDeallocator) xmlSchemaFreeAttribute); + if (schema->attrgrpDecl != NULL) + xmlHashFree(schema->attrgrpDecl, + (xmlHashDeallocator) xmlSchemaFreeAttributeGroup); + if (schema->elemDecl != NULL) + xmlHashFree(schema->elemDecl, + (xmlHashDeallocator) xmlSchemaFreeElement); + if (schema->typeDecl != NULL) + xmlHashFree(schema->typeDecl, + (xmlHashDeallocator) xmlSchemaFreeType); + if (schema->annot != NULL) + xmlSchemaFreeAnnot(schema->annot); + if (schema->doc != NULL) + xmlFreeDoc(schema->doc); + + xmlFree(schema); +} + +/************************************************************************ + * * + * Error functions * + * * + ************************************************************************/ + +/** + * xmlSchemaErrorContext: + * @ctxt: the parsing context + * @schema: the schema being built + * @node: the node being processed + * @child: the child being processed + * + * Dump a SchemaType structure + */ +static void +xmlSchemaErrorContext(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node, xmlNodePtr child) +{ + int line = 0; + const xmlChar *file = NULL; + const xmlChar *name = NULL; + const char *type = "error"; + + if ((ctxt == NULL) || (ctxt->error == NULL)) + return; + + if (child != NULL) + node = child; + + if (node != NULL) { + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + xmlDocPtr doc = (xmlDocPtr) node; + + file = doc->URL; + } else { + /* + * Try to find contextual informations to report + */ + if (node->type == XML_ELEMENT_NODE) { + line = (int) node->content; + } else if ((node->prev != NULL) && + (node->prev->type == XML_ELEMENT_NODE)) { + line = (int) node->prev->content; + } else if ((node->parent != NULL) && + (node->parent->type == XML_ELEMENT_NODE)) { + line = (int) node->parent->content; + } + if ((node->doc != NULL) && (node->doc->URL != NULL)) + file = node->doc->URL; + if (node->name != NULL) + name = node->name; + } + } + + if (ctxt != NULL) + type = "compilation error"; + else if (schema != NULL) + type = "runtime error"; + + if ((file != NULL) && (line != 0) && (name != NULL)) + ctxt->error(ctxt->userData, "%s: file %s line %d element %s\n", + type, file, line, name); + else if ((file != NULL) && (name != NULL)) + ctxt->error(ctxt->userData, "%s: file %s element %s\n", + type, file, name); + else if ((file != NULL) && (line != 0)) + ctxt->error(ctxt->userData, "%s: file %s line %d\n", type, file, line); + else if (file != NULL) + ctxt->error(ctxt->userData, "%s: file %s\n", type, file); + else if (name != NULL) + ctxt->error(ctxt->userData, "%s: element %s\n", type, name); + else + ctxt->error(ctxt->userData, "%s\n", type); +} + +/************************************************************************ + * * + * Debug functions * + * * + ************************************************************************/ + +/** + * xmlSchemaElementDump: + * @elem: an element + * @output: the file output + * + * Dump the element + */ +static void +xmlSchemaElementDump(xmlSchemaElementPtr elem, FILE * output, + const xmlChar *name ATTRIBUTE_UNUSED, + const xmlChar *context ATTRIBUTE_UNUSED, + const xmlChar *namespace ATTRIBUTE_UNUSED) +{ + if (elem == NULL) + return; + + fprintf(output, "Element "); + if (elem->flags & XML_SCHEMAS_ELEM_TOPLEVEL) + fprintf(output, "toplevel "); + fprintf(output, ": %s ", elem->name); + if (namespace != NULL) + fprintf(output, "namespace '%s' ", namespace); + + if (elem->flags & XML_SCHEMAS_ELEM_NILLABLE) + fprintf(output, "nillable "); + if (elem->flags & XML_SCHEMAS_ELEM_GLOBAL) + fprintf(output, "global "); + if (elem->flags & XML_SCHEMAS_ELEM_DEFAULT) + fprintf(output, "default "); + if (elem->flags & XML_SCHEMAS_ELEM_FIXED) + fprintf(output, "fixed "); + if (elem->flags & XML_SCHEMAS_ELEM_ABSTRACT) + fprintf(output, "abstract "); + if (elem->flags & XML_SCHEMAS_ELEM_REF) + fprintf(output, "ref '%s' ", elem->ref); + if (elem->id != NULL) + fprintf(output, "id '%s' ", elem->id); + fprintf(output, "\n"); + if ((elem->minOccurs != 1) || (elem->maxOccurs != 1)) { + fprintf(output, " "); + if (elem->minOccurs != 1) + fprintf(output, "min: %d ", elem->minOccurs); + if (elem->maxOccurs >= UNBOUNDED) + fprintf(output, "max: unbounded\n"); + else if (elem->maxOccurs != 1) + fprintf(output, "max: %d\n", elem->maxOccurs); + else + fprintf(output, "\n"); + } + if (elem->namedType != NULL) { + fprintf(output, " type: %s", elem->namedType); + if (elem->namedTypeNs != NULL) + fprintf(output, " ns %s\n", elem->namedTypeNs); + else + fprintf(output, "\n"); + } + if (elem->substGroup != NULL) { + fprintf(output, " substitutionGroup: %s", elem->substGroup); + if (elem->substGroupNs != NULL) + fprintf(output, " ns %s\n", elem->substGroupNs); + else + fprintf(output, "\n"); + } + if (elem->value != NULL) + fprintf(output, " default: %s", elem->value); +} + +/** + * xmlSchemaAnnotDump: + * @output: the file output + * @annot: a annotation + * + * Dump the annotation + */ +static void +xmlSchemaAnnotDump(FILE * output, xmlSchemaAnnotPtr annot) +{ + xmlChar *content; + + if (annot == NULL) + return; + + content = xmlNodeGetContent(annot->content); + if (content != NULL) { + fprintf(output, " Annot: %s\n", content); + xmlFree(content); + } else + fprintf(output, " Annot: empty\n"); +} + +/** + * xmlSchemaTypeDump: + * @output: the file output + * @type: a type structure + * + * Dump a SchemaType structure + */ +static void +xmlSchemaTypeDump(xmlSchemaTypePtr type, FILE * output) +{ + if (type == NULL) { + fprintf(output, "Type: NULL\n"); + return; + } + fprintf(output, "Type: "); + if (type->name != NULL) + fprintf(output, "%s, ", type->name); + else + fprintf(output, "no name"); + switch (type->type) { + case XML_SCHEMA_TYPE_BASIC: + fprintf(output, "basic "); + break; + case XML_SCHEMA_TYPE_SIMPLE: + fprintf(output, "simple "); + break; + case XML_SCHEMA_TYPE_COMPLEX: + fprintf(output, "complex "); + break; + case XML_SCHEMA_TYPE_SEQUENCE: + fprintf(output, "sequence "); + break; + case XML_SCHEMA_TYPE_CHOICE: + fprintf(output, "choice "); + break; + case XML_SCHEMA_TYPE_ALL: + fprintf(output, "all "); + break; + case XML_SCHEMA_TYPE_UR: + fprintf(output, "ur "); + break; + case XML_SCHEMA_TYPE_RESTRICTION: + fprintf(output, "restriction "); + break; + case XML_SCHEMA_TYPE_EXTENSION: + fprintf(output, "extension "); + break; + default: + fprintf(output, "unknowntype%d ", type->type); + break; + } + if (type->base != NULL) { + fprintf(output, "base %s, ", type->base); + } + switch (type->contentType) { + case XML_SCHEMA_CONTENT_UNKNOWN: + fprintf(output, "unknown "); + break; + case XML_SCHEMA_CONTENT_EMPTY: + fprintf(output, "empty "); + break; + case XML_SCHEMA_CONTENT_ELEMENTS: + fprintf(output, "element "); + break; + case XML_SCHEMA_CONTENT_MIXED: + fprintf(output, "mixed "); + break; + case XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS: + fprintf(output, "mixed_or_elems "); + break; + case XML_SCHEMA_CONTENT_BASIC: + fprintf(output, "basic "); + break; + case XML_SCHEMA_CONTENT_SIMPLE: + fprintf(output, "simple "); + break; + case XML_SCHEMA_CONTENT_ANY: + fprintf(output, "any "); + break; + } + fprintf(output, "\n"); + if ((type->minOccurs != 1) || (type->maxOccurs != 1)) { + fprintf(output, " "); + if (type->minOccurs != 1) + fprintf(output, "min: %d ", type->minOccurs); + if (type->maxOccurs >= UNBOUNDED) + fprintf(output, "max: unbounded\n"); + else if (type->maxOccurs != 1) + fprintf(output, "max: %d\n", type->maxOccurs); + else + fprintf(output, "\n"); + } + if (type->annot != NULL) + xmlSchemaAnnotDump(output, type->annot); + if (type->subtypes != NULL) { + xmlSchemaTypePtr sub = type->subtypes; + + fprintf(output, " subtypes: "); + while (sub != NULL) { + fprintf(output, "%s ", sub->name); + sub = sub->next; + } + fprintf(output, "\n"); + } + +} + +/** + * xmlSchemaDump: + * @output: the file output + * @schema: a schema structure + * + * Dump a Schema structure. + */ +void +xmlSchemaDump(FILE * output, xmlSchemaPtr schema) +{ + if (schema == NULL) { + fprintf(output, "Schemas: NULL\n"); + return; + } + fprintf(output, "Schemas: "); + if (schema->name != NULL) + fprintf(output, "%s, ", schema->name); + else + fprintf(output, "no name, "); + if (schema->targetNamespace != NULL) + fprintf(output, "%s", schema->targetNamespace); + else + fprintf(output, "no target namespace"); + fprintf(output, "\n"); + if (schema->annot != NULL) + xmlSchemaAnnotDump(output, schema->annot); + + xmlHashScan(schema->typeDecl, (xmlHashScanner) xmlSchemaTypeDump, + output); + xmlHashScanFull(schema->elemDecl, + (xmlHashScannerFull) xmlSchemaElementDump, output); +} + +/************************************************************************ + * * + * Parsing functions * + * * + ************************************************************************/ + +/** + * xmlSchemaGetType: + * @schema: the schemas context + * @name: the type name + * @ns: the type namespace + * + * Lookup a type in the schemas or the predefined types + * + * Returns 1 if the string is NULL or made of blanks chars, 0 otherwise + */ +static xmlSchemaTypePtr +xmlSchemaGetType(xmlSchemaPtr schema, const xmlChar * name, + const xmlChar * namespace) { + xmlSchemaTypePtr ret; + + if (name == NULL) + return(NULL); + if (schema != NULL) { + ret = xmlHashLookup2(schema->typeDecl, name, namespace); + if (ret != NULL) + return(ret); + } + ret = xmlSchemaGetPredefinedType(name, namespace); +#ifdef DEBUG + if (ret == NULL) { + if (namespace == NULL) + fprintf(stderr, "Unable to lookup type %s", name); + else + fprintf(stderr, "Unable to lookup type %s:%s", name, namespace); + } +#endif + return(ret); +} + +/************************************************************************ + * * + * Parsing functions * + * * + ************************************************************************/ + +#define IS_BLANK_NODE(n) \ + (((n)->type == XML_TEXT_NODE) && (xmlSchemaIsBlank((n)->content))) + +/** + * xmlSchemaIsBlank: + * @str: a string + * + * Check if a string is ignorable + * + * Returns 1 if the string is NULL or made of blanks chars, 0 otherwise + */ +static int +xmlSchemaIsBlank(xmlChar *str) { + if (str == NULL) + return(1); + while (*str != 0) { + if (!(IS_BLANK(*str))) return(0); + str++; + } + return(1); +} + +/** + * xmlSchemaAddNotation: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the item name + * + * Add an XML schema Attrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaNotationPtr +xmlSchemaAddNotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name) +{ + xmlSchemaNotationPtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->notaDecl == NULL) + schema->notaDecl = xmlHashCreate(10); + if (schema->notaDecl == NULL) + return (NULL); + + ret = (xmlSchemaNotationPtr) xmlMalloc(sizeof(xmlSchemaNotation)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaNotation)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry2(schema->notaDecl, name, schema->targetNamespace, + ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add notation %s\n", + name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + return (ret); +} + + +/** + * xmlSchemaAddAttribute: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the item name + * @container: the container's name + * + * Add an XML schema Attrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaAttributePtr +xmlSchemaAddAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name) +{ + xmlSchemaAttributePtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->attrDecl == NULL) + schema->attrDecl = xmlHashCreate(10); + if (schema->attrDecl == NULL) + return (NULL); + + ret = (xmlSchemaAttributePtr) xmlMalloc(sizeof(xmlSchemaAttribute)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaAttribute)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry3(schema->attrDecl, name, + schema->targetNamespace, ctxt->container, ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add attribute %s\n", + name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + return (ret); +} + +/** + * xmlSchemaAddAttributeGroup: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the item name + * + * Add an XML schema Attrribute Group declaration + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaAttributeGroupPtr +xmlSchemaAddAttributeGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name) +{ + xmlSchemaAttributeGroupPtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->attrgrpDecl == NULL) + schema->attrgrpDecl = xmlHashCreate(10); + if (schema->attrgrpDecl == NULL) + return (NULL); + + ret = (xmlSchemaAttributeGroupPtr) xmlMalloc(sizeof(xmlSchemaAttributeGroup)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaAttributeGroup)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry3(schema->attrgrpDecl, name, + schema->targetNamespace, ctxt->container, ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add attribute group %s\n", + name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + return (ret); +} + +/** + * xmlSchemaAddElement: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the type name + * @namespace: the type namespace + * + * Add an XML schema Element declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaElementPtr +xmlSchemaAddElement(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name, const xmlChar * namespace) +{ + xmlSchemaElementPtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->elemDecl == NULL) + schema->elemDecl = xmlHashCreate(10); + if (schema->elemDecl == NULL) + return (NULL); + + ret = (xmlSchemaElementPtr) xmlMalloc(sizeof(xmlSchemaElement)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaElement)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry3(schema->elemDecl, name, + namespace, ctxt->container, ret); + if (val != 0) { + char buf[100]; + + snprintf(buf, 99, "privatieelem%d", ctxt->counter++ + 1); + val = xmlHashAddEntry3(schema->elemDecl, name, (xmlChar *) buf, + namespace, ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add element %s\n", + name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + } + return (ret); +} + +/** + * xmlSchemaAddType: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the item name + * + * Add an XML schema Simple Type definition + * *WARNING* this interface is highly subject to change + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaAddType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name) +{ + xmlSchemaTypePtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->typeDecl == NULL) + schema->typeDecl = xmlHashCreate(10); + if (schema->typeDecl == NULL) + return (NULL); + + ret = (xmlSchemaTypePtr) xmlMalloc(sizeof(xmlSchemaType)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaType)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry2(schema->typeDecl, name, schema->targetNamespace, + ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add type %s\n", name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + ret->minOccurs = 1; + ret->maxOccurs = 1; + + return (ret); +} + +/************************************************************************ + * * + * Utilities for parsing * + * * + ************************************************************************/ + +/** + * xmlGetQNameProp: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * @name: the attribute name + * @namespace: the result namespace if any + * + * Extract a QName Attribute value + * + * Returns the NCName or NULL if not found, and also update @namespace + * with the namespace URI + */ +static xmlChar * +xmlGetQNameProp(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, + const char *name, + xmlChar **namespace) { + xmlChar *val, *ret, *prefix; + xmlNsPtr ns; + + + if (namespace != NULL) + *namespace = NULL; + val = xmlGetProp(node, (const xmlChar *) name); + if (val == NULL) + return(NULL); + + ret = xmlSplitQName2(val, &prefix); + if (ret == NULL) + return(val); + xmlFree(val); + + ns = xmlSearchNs(node->doc, node, prefix); + if (ns == NULL) { + xmlSchemaErrorContext(ctxt, NULL, node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Attribute %s: the QName prefix %s is undefined\n", + name, prefix); + } else { + *namespace = xmlStrdup(ns->href); + } + xmlFree(prefix); + return(ret); +} + +/** + * xmlGetMaxOccurs: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * + * Get the maxOccurs property + * + * Returns the default if not found, or the value + */ +static int +xmlGetMaxOccurs(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) { + xmlChar *val, *cur; + int ret = 0; + + val = xmlGetProp(node, (const xmlChar *) "maxOccurs"); + if (val == NULL) + return(1); + + if (xmlStrEqual(val, (const xmlChar *) "unbounded")) { + xmlFree(val); + return(UNBOUNDED); /* encoding it with -1 might be another option */ + } + + cur = val; + while (IS_BLANK(*cur)) cur++; + while ((*cur >= '0') && (*cur <= '9')) { + ret = ret * 10 + (*cur - '0'); + cur++; + } + while (IS_BLANK(*cur)) cur++; + if (*cur != 0) { + xmlSchemaErrorContext(ctxt, NULL, node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "invalid value for minOccurs: %s\n", + val); + xmlFree(val); + return(1); + } + xmlFree(val); + return(ret); +} + +/** + * xmlGetMinOccurs: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * + * Get the minOccurs property + * + * Returns the default if not found, or the value + */ +static int +xmlGetMinOccurs(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) { + xmlChar *val, *cur; + int ret = 0; + + val = xmlGetProp(node, (const xmlChar *) "minOccurs"); + if (val == NULL) + return(1); + + cur = val; + while (IS_BLANK(*cur)) cur++; + while ((*cur >= '0') && (*cur <= '9')) { + ret = ret * 10 + (*cur - '0'); + cur++; + } + while (IS_BLANK(*cur)) cur++; + if (*cur != 0) { + xmlSchemaErrorContext(ctxt, NULL, node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "invalid value for minOccurs: %s\n", + val); + xmlFree(val); + return(1); + } + xmlFree(val); + return(ret); +} + +/** + * xmlGetBooleanProp: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * @name: the attribute name + * @def: the default value + * + * Get is a bolean property is set + * + * Returns the default if not found, 0 if found to be false, + * 1 if found to be true + */ +static int +xmlGetBooleanProp(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, + const char *name, int def) { + xmlChar *val; + + val = xmlGetProp(node, (const xmlChar *) name); + if (val == NULL) + return(def); + + if (xmlStrEqual(val, BAD_CAST"true")) + def = 1; + else if (xmlStrEqual(val, BAD_CAST"false")) + def = 0; + else { + xmlSchemaErrorContext(ctxt, NULL, node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Attribute %s: the value %s is not boolean\n", + name, val); + } + xmlFree(val); + return(def); +} + +/************************************************************************ + * * + * Shema extraction from an Infoset * + * * + ************************************************************************/ +static xmlSchemaTypePtr xmlSchemaParseSimpleType(xmlSchemaParserCtxtPtr + ctxt, xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseComplexType(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseRestriction(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node, + int simple); +static xmlSchemaTypePtr xmlSchemaParseSequence(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseAll(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaAttributePtr xmlSchemaParseAttribute(xmlSchemaParserCtxtPtr + ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaAttributeGroupPtr +xmlSchemaParseAttributeGroup(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseChoice(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseList(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaAttributePtr +xmlSchemaParseAnyAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node); + +/** + * xmlSchemaParseAttrDecls: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * @type: the hosting type + * + * parse a XML schema attrDecls declaration corresponding to + * <!ENTITY % attrDecls + * '((%attribute;| %attributeGroup;)*,(%anyAttribute;)?)'> + */ +static xmlNodePtr +xmlSchemaParseAttrDecls(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr child, xmlSchemaTypePtr type) +{ + xmlSchemaAttributePtr lastattr, attr; + + lastattr = NULL; + while ((IS_SCHEMA(child, "attribute")) || + (IS_SCHEMA(child, "attributeGroup"))) { + attr = NULL; + if (IS_SCHEMA(child, "attribute")) { + attr = xmlSchemaParseAttribute(ctxt, schema, child); + } else if (IS_SCHEMA(child, "attributeGroup")) { + attr = (xmlSchemaAttributePtr) + xmlSchemaParseAttributeGroup(ctxt, schema, child); + } + if (attr != NULL) { + if (lastattr == NULL) { + type->attributes = attr; + lastattr = attr + ; + } else { + lastattr->next = attr; + lastattr = attr; + } + } + child = child->next; + } + if (IS_SCHEMA(child, "anyAttribute")) { + attr = xmlSchemaParseAnyAttribute(ctxt, schema, child); + if (attr != NULL) { + if (lastattr == NULL) { + type->attributes = attr; + lastattr = attr + ; + } else { + lastattr->next = attr; + lastattr = attr; + } + } + child = child->next; + } + return(child); +} + +/** + * xmlSchemaParseAnnotation: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Attrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaAnnotPtr +xmlSchemaParseAnnotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaAnnotPtr ret; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + ret = xmlSchemaNewAnnot(ctxt, node); + + return (ret); +} + +/** + * xmlSchemaParseFacet: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Facet declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new type structure or NULL in case of error + */ +static xmlSchemaFacetPtr +xmlSchemaParseFacet(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaFacetPtr facet; + xmlNodePtr child = NULL; + xmlChar *value; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + facet = xmlSchemaNewFacet(ctxt); + if (facet == NULL) + return (NULL); + facet->node = node; + value = xmlGetProp(node, (const xmlChar *) "value"); + if (value == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Facet %s has no value\n", node->name); + xmlSchemaFreeFacet(facet); + return (NULL); + } + if (IS_SCHEMA(node, "minInclusive")) { + facet->type = XML_SCHEMA_FACET_MININCLUSIVE; + } else if (IS_SCHEMA(node, "minExclusive")) { + facet->type = XML_SCHEMA_FACET_MINEXCLUSIVE; + } else if (IS_SCHEMA(node, "maxInclusive")) { + facet->type = XML_SCHEMA_FACET_MAXINCLUSIVE; + } else if (IS_SCHEMA(node, "maxExclusive")) { + facet->type = XML_SCHEMA_FACET_MAXEXCLUSIVE; + } else if (IS_SCHEMA(node, "totalDigits")) { + facet->type = XML_SCHEMA_FACET_TOTALDIGITS; + } else if (IS_SCHEMA(node, "fractionDigits")) { + facet->type = XML_SCHEMA_FACET_FRACTIONDIGITS; + } else if (IS_SCHEMA(node, "pattern")) { + facet->type = XML_SCHEMA_FACET_PATTERN; + } else if (IS_SCHEMA(node, "enumeration")) { + facet->type = XML_SCHEMA_FACET_ENUMERATION; + } else if (IS_SCHEMA(node, "whiteSpace")) { + facet->type = XML_SCHEMA_FACET_WHITESPACE; + } else if (IS_SCHEMA(node, "length")) { + facet->type = XML_SCHEMA_FACET_LENGTH; + } else if (IS_SCHEMA(node, "maxLength")) { + facet->type = XML_SCHEMA_FACET_MAXLENGTH; + } else if (IS_SCHEMA(node, "minLength")) { + facet->type = XML_SCHEMA_FACET_MINLENGTH; + } else { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Unknown facet type %s\n", node->name); + xmlSchemaFreeFacet(facet); + return(NULL); + } + facet->id = xmlGetProp(node, (const xmlChar *) "id"); + facet->value = value; + child = node->children; + + if (IS_SCHEMA(child, "annotation")) { + facet->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Facet %s has unexpected child content\n", + node->name); + } + return (facet); +} + +/** + * xmlSchemaParseAny: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Any declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new type structure or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseAny(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + snprintf((char *)name, 30, "any %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_ANY; + child = node->children; + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Sequence %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseNotation: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Notation declaration + * + * Returns the new structure or NULL in case of error + */ +static xmlSchemaNotationPtr +xmlSchemaParseNotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlChar *name; + xmlSchemaNotationPtr ret; + xmlNodePtr child = NULL; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Notation has no name\n"); + return (NULL); + } + ret = xmlSchemaAddNotation(ctxt, schema, name); + if (ret == NULL) { + xmlFree(name); + return (NULL); + } + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "notation %s has unexpected content\n", + name); + } + + return (ret); +} + +/** + * xmlSchemaParseAnyAttribute: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema AnyAttrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns an attribute def structure or NULL + */ +static xmlSchemaAttributePtr +xmlSchemaParseAnyAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlChar *processContents; + xmlSchemaAttributePtr ret; + xmlNodePtr child = NULL; + char name[100]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + snprintf(name, 99, "anyattr %d", ctxt->counter++ + 1); + ret = xmlSchemaAddAttribute(ctxt, schema, (xmlChar *)name); + if (ret == NULL) { + return (NULL); + } + ret->id = xmlGetProp(node, (const xmlChar *) "id"); + processContents = xmlGetProp(node, (const xmlChar *) "processContents"); + if ((processContents == NULL) || + (xmlStrEqual(processContents, (const xmlChar *)"strict"))) { + ret->occurs = XML_SCHEMAS_ANYATTR_STRICT; + } else if (xmlStrEqual(processContents, (const xmlChar *)"skip")) { + ret->occurs = XML_SCHEMAS_ANYATTR_SKIP; + } else if (xmlStrEqual(processContents, (const xmlChar *)"lax")) { + ret->occurs = XML_SCHEMAS_ANYATTR_LAX; + } else { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "anyAttribute has unexpected content for processContents: %s\n", + processContents); + ret->occurs = XML_SCHEMAS_ANYATTR_STRICT; + } + if (processContents != NULL) + xmlFree(processContents); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "anyAttribute %s has unexpected content\n", + name); + } + + return (ret); +} + + +/** + * xmlSchemaParseAttribute: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Attrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaAttributePtr +xmlSchemaParseAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlChar *name, *refNs = NULL, *ref = NULL; + xmlSchemaAttributePtr ret; + xmlNodePtr child = NULL; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + ref = xmlGetQNameProp(ctxt, node, "ref", &refNs); + if (ref == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Attribute has no name nor ref\n"); + return (NULL); + } + snprintf(buf, 99, "anonattr%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + ret = xmlSchemaAddAttribute(ctxt, schema, name); + if (ret == NULL) { + xmlFree(name); + if (ref != NULL) + xmlFree(ref); + return (NULL); + } + xmlFree(name); + ret->ref = ref; + ret->refNs = refNs; + ret->typeName = xmlGetQNameProp(ctxt, node, "type", &(ret->typeNs)); + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (IS_SCHEMA(child, "simpleType")) { + ret->subtypes = xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "attribute %s has unexpected content\n", + name); + } + + return (ret); +} + +/** + * xmlSchemaParseAttributeGroup: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Attribute Group declaration + * *WARNING* this interface is highly subject to change + * + * Returns the attribute group or NULL in case of error. + */ +static xmlSchemaAttributeGroupPtr +xmlSchemaParseAttributeGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlChar *name, *refNs = NULL, *ref = NULL; + xmlSchemaAttributeGroupPtr ret; + xmlSchemaAttributePtr last = NULL, attr; + xmlNodePtr child = NULL; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + oldcontainer = ctxt->container; + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + ref = xmlGetQNameProp(ctxt, node, "ref", &refNs); + if (ref == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "AttributeGroup has no name nor ref\n"); + return (NULL); + } + snprintf(buf, 99, "anonattrgroup%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + ret = xmlSchemaAddAttributeGroup(ctxt, schema, name); + if (ret == NULL) { + xmlFree(name); + if (ref != NULL) + xmlFree(ref); + return (NULL); + } + ret->ref = ref; + ret->refNs = refNs; + ret->type = XML_SCHEMA_TYPE_ATTRIBUTEGROUP; + child = node->children; + ctxt->container = name; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while ((IS_SCHEMA(child, "attribute")) || + (IS_SCHEMA(child, "attributeGroup"))) { + attr = NULL; + if (IS_SCHEMA(child, "attribute")) { + attr = xmlSchemaParseAttribute(ctxt, schema, child); + } else if (IS_SCHEMA(child, "attributeGroup")) { + attr = (xmlSchemaAttributePtr) + xmlSchemaParseAttributeGroup(ctxt, schema, child); + } + if (attr != NULL) { + if (last == NULL) { + ret->attributes = attr; + last = attr; + } else { + last->next = attr; + last = attr; + } + } + child = child->next; + } + if (IS_SCHEMA(child, "anyAttribute")) { + TODO + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "attribute group %s has unexpected content\n", + name); + } + + ctxt->container = oldcontainer; + return (ret); +} + +/** + * xmlSchemaParseElement: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Element declaration + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaElementPtr +xmlSchemaParseElement(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node, int toplevel) +{ + xmlChar *name, *refNs = NULL, *ref = NULL, *namespace, *fixed; + xmlSchemaElementPtr ret; + xmlNodePtr child = NULL; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + oldcontainer = ctxt->container; + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + ref = xmlGetQNameProp(ctxt, node, "ref", &refNs); + if (ref == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Element has no name nor ref\n"); + return (NULL); + } + snprintf(buf, 99, "anonelem%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + namespace = xmlGetProp(node, (const xmlChar *) "targetNamespace"); + if (namespace == NULL) + ret = + xmlSchemaAddElement(ctxt, schema, name, + schema->targetNamespace); + else + ret = xmlSchemaAddElement(ctxt, schema, name, namespace); + if (namespace != NULL) + xmlFree(namespace); + if (ret == NULL) { + xmlFree(name); + if (ref != NULL) + xmlFree(ref); + return (NULL); + } + ret->type = XML_SCHEMA_TYPE_ELEMENT; + ret->ref = ref; + ret->refNs = refNs; + if (ref != NULL) + ret->flags |= XML_SCHEMAS_ELEM_REF; + if (toplevel) + ret->flags |= XML_SCHEMAS_ELEM_TOPLEVEL; + if (xmlGetBooleanProp(ctxt, node, "nillable", 0)) + ret->flags |= XML_SCHEMAS_ELEM_NILLABLE; + if (xmlGetBooleanProp(ctxt, node, "abstract", 0)) + ret->flags |= XML_SCHEMAS_ELEM_NILLABLE; + ctxt->container = name; + + ret->id = xmlGetProp(node, BAD_CAST "id"); + ret->namedType = xmlGetQNameProp(ctxt, node, "type", &(ret->namedTypeNs)); + ret->substGroup = xmlGetQNameProp(ctxt, node, "substitutionGroup", + &(ret->substGroupNs)); + fixed = xmlGetProp(node, BAD_CAST "fixed"); + ret->minOccurs = xmlGetMinOccurs(ctxt, node); + ret->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + ret->value = xmlGetProp(node, BAD_CAST "default"); + if ((ret->value != NULL) && (fixed != NULL)) { + xmlSchemaErrorContext(ctxt, schema, node, child); + ctxt->error(ctxt->userData, + "Element %s has both default and fixed\n", + ret->name); + xmlFree(fixed); + } else if (fixed != NULL) { + ret->flags |= XML_SCHEMAS_ELEM_FIXED; + ret->value = fixed; + } + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (IS_SCHEMA(child, "complexType")) { + ret->subtypes = xmlSchemaParseComplexType(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "simpleType")) { + ret->subtypes = xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + } + while ((IS_SCHEMA(child, "unique")) || + (IS_SCHEMA(child, "key")) || + (IS_SCHEMA(child, "keyref"))) { + TODO + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "element %s has unexpected content\n", + name); + } + + ctxt->container = oldcontainer; + xmlFree(name); + return (ret); +} + +/** + * xmlSchemaParseUnion: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Union definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseUnion(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype, last = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "union %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_LIST; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->ref = xmlGetProp(node, BAD_CAST "memberTypes"); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while (IS_SCHEMA(child, "simpleType")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSimpleType(ctxt, schema, child); + if (subtype != NULL) { + if (last == NULL) { + type->subtypes = subtype; + last = subtype; + } else { + last->next = subtype; + last = subtype; + } + last->next = NULL; + } + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Union %s has unexpected content\n", + type->name); + } + return (type); +} + +/** + * xmlSchemaParseList: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema List definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseList(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + snprintf((char *)name, 30, "list %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_LIST; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->ref = xmlGetQNameProp(ctxt, node, "ref", &(type->refNs)); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "simpleType")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "List %s has unexpected content\n", + type->name); + } + return (type); +} +/** + * xmlSchemaParseSimpleType: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Simple Type definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseSimpleType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar *name; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + snprintf(buf, 99, "simpletype%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + if (name == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "simpleType has no name\n"); + return (NULL); + } + type = xmlSchemaAddType(ctxt, schema, name); + xmlFree(name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_SIMPLE; + type->id = xmlGetProp(node, BAD_CAST "id"); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "restriction")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseRestriction(ctxt, schema, child, 1); + child = child->next; + } else if (IS_SCHEMA(child, "list")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseList(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "union")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseUnion(ctxt, schema, child); + child = child->next; + } + type->subtypes = subtype; + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "SimpleType %s has unexpected content\n", + type->name); + } + + return (type); +} + + +/** + * xmlSchemaParseGroup: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Group definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar *name, *ref = NULL, *refNs = NULL; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + ref = xmlGetQNameProp(ctxt, node, "ref", &refNs); + if (ref == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Group has no name nor ref\n"); + return (NULL); + } + snprintf(buf, 99, "anongroup%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_GROUP; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->ref = ref; + type->refNs = refNs; + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "all")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseAll(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "sequence")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSequence(ctxt, schema, child); + child = child->next; + } + if (subtype != NULL) + type->subtypes = subtype; + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Group %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseAll: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema All definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseAll(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype, last = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "all%d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_ALL; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while (IS_SCHEMA(child, "element")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseElement(ctxt, schema, child, 0); + if (subtype != NULL) { + if (last == NULL) { + type->subtypes = subtype; + last = subtype; + } else { + last->next = subtype; + last = subtype; + } + last->next = NULL; + } + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "All %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseImport: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Import definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static int +xmlSchemaParseImport(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlNodePtr child = NULL; + xmlChar *namespace; + xmlChar *schemaLocation; + xmlChar *previous; + xmlURIPtr check; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (-1); + + namespace = xmlGetProp(node, BAD_CAST "namespace"); + if (namespace != NULL) { + check = xmlParseURI((const char *) namespace); + if (check == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Import namespace attribute is not an URI: %s\n", + namespace); + xmlFree(namespace); + return(-1); + } else { + xmlFreeURI(check); + } + } + schemaLocation = xmlGetProp(node, BAD_CAST "schemaLocation"); + if (schemaLocation != NULL) { + check = xmlParseURI((const char *) schemaLocation); + if (check == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Import schemaLocation attribute is not an URI: %s\n", + schemaLocation); + if (namespace != NULL) + xmlFree(namespace); + xmlFree(schemaLocation); + return(-1); + } else { + xmlFreeURI(check); + } + } + if (schema->schemasImports == NULL) { + schema->schemasImports = xmlHashCreate(10); + if (schema->schemasImports == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Internal: failed to build import table\n"); + if (namespace != NULL) + xmlFree(namespace); + if (schemaLocation != NULL) + xmlFree(schemaLocation); + return(-1); + } + } + if (namespace == NULL) { + previous = xmlHashLookup(schema->schemasImports, + XML_SCHEMAS_DEFAULT_NAMESPACE); + if (schemaLocation != NULL) { + if (previous != NULL) { + if (!xmlStrEqual(schemaLocation, previous)) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Redefining import for default namespace with a different URI: %s\n", + schemaLocation); + } + } else { + xmlHashAddEntry(schema->schemasImports, + XML_SCHEMAS_DEFAULT_NAMESPACE, schemaLocation); + } + } + } else { + previous = xmlHashLookup(schema->schemasImports, namespace); + if (schemaLocation != NULL) { + if (previous != NULL) { + if (!xmlStrEqual(schemaLocation, previous)) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Redefining import for namespace %s with a different URI: %s\n", + namespace, schemaLocation); + } + } else { + xmlHashAddEntry(schema->schemasImports, + namespace, schemaLocation); + } + } + } + + child = node->children; + while (IS_SCHEMA(child, "annotation")) { + /* + * the annotations here are simply discarded ... + */ + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Import has unexpected content\n"); + return(-1); + } + return(1); +} + +/** + * xmlSchemaParseChoice: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Choice definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseChoice(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype, last = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "choice %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_CHOICE; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while ((IS_SCHEMA(child, "element")) || + (IS_SCHEMA(child, "group")) || + (IS_SCHEMA(child, "any")) || + (IS_SCHEMA(child, "choice")) || + (IS_SCHEMA(child, "sequence"))) { + subtype = NULL; + if (IS_SCHEMA(child, "element")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseElement(ctxt, schema, child, 0); + } else if (IS_SCHEMA(child, "group")) { + subtype = xmlSchemaParseGroup(ctxt, schema, child); + } else if (IS_SCHEMA(child, "any")) { + subtype = xmlSchemaParseAny(ctxt, schema, child); + } else if (IS_SCHEMA(child, "sequence")) { + subtype = xmlSchemaParseSequence(ctxt, schema, child); + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + } + if (subtype != NULL) { + if (last == NULL) { + type->subtypes = subtype; + last = subtype; + } else { + last->next = subtype; + last = subtype; + } + last->next = NULL; + } + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Choice %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseSequence: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Sequence definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseSequence(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype, last = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "sequence %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_SEQUENCE; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while ((IS_SCHEMA(child, "element")) || + (IS_SCHEMA(child, "group")) || + (IS_SCHEMA(child, "any")) || + (IS_SCHEMA(child, "choice")) || + (IS_SCHEMA(child, "sequence"))) { + subtype = NULL; + if (IS_SCHEMA(child, "element")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseElement(ctxt, schema, child, 0); + } else if (IS_SCHEMA(child, "group")) { + subtype = xmlSchemaParseGroup(ctxt, schema, child); + } else if (IS_SCHEMA(child, "any")) { + subtype = xmlSchemaParseAny(ctxt, schema, child); + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + } else if (IS_SCHEMA(child, "sequence")) { + subtype = xmlSchemaParseSequence(ctxt, schema, child); + } + if (subtype != NULL) { + if (last == NULL) { + type->subtypes = subtype; + last = subtype; + } else { + last->next = subtype; + last = subtype; + } + last->next = NULL; + } + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Sequence %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseRestriction: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * @simple: is that part of a simple type. + * + * parse a XML schema Restriction definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseRestriction(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node, int simple) +{ + xmlSchemaTypePtr type, subtype; + xmlSchemaFacetPtr facet, lastfacet = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + oldcontainer = ctxt->container; + + snprintf((char *)name, 30, "restriction %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_RESTRICTION; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->base = xmlGetQNameProp(ctxt, node, "base", &(type->baseNs)); + if ((!simple) && (type->base == NULL)) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Restriction %s has no base\n", + type->name); + } + ctxt->container = name; + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + + if (IS_SCHEMA(child, "all")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseAll(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } else if (IS_SCHEMA(child, "sequence")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSequence(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } else if (IS_SCHEMA(child, "group")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseGroup(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } else { + if (IS_SCHEMA(child, "simpleType")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + type->baseType = subtype; + } + /* + * Facets + */ + while ((IS_SCHEMA(child, "minInclusive")) || + (IS_SCHEMA(child, "minExclusive")) || + (IS_SCHEMA(child, "maxInclusive")) || + (IS_SCHEMA(child, "maxExclusive")) || + (IS_SCHEMA(child, "totalDigits")) || + (IS_SCHEMA(child, "fractionDigits")) || + (IS_SCHEMA(child, "pattern")) || + (IS_SCHEMA(child, "enumeration")) || + (IS_SCHEMA(child, "whiteSpace")) || + (IS_SCHEMA(child, "length")) || + (IS_SCHEMA(child, "maxLength")) || + (IS_SCHEMA(child, "minLength"))) { + facet = xmlSchemaParseFacet(ctxt, schema, child); + if (facet != NULL) { + if (lastfacet == NULL) { + type->facets = facet; + lastfacet = facet; + } else { + lastfacet->next = facet; + lastfacet = facet; + } + lastfacet->next = NULL; + } + child = child->next; + } + } + child = xmlSchemaParseAttrDecls(ctxt, schema, child, type); + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Restriction %s has unexpected content\n", + type->name); + } + ctxt->container = oldcontainer; + return (type); +} + +/** + * xmlSchemaParseExtension: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Extension definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseExtension(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar name[30]; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + oldcontainer = ctxt->container; + + snprintf((char *)name, 30, "extension %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_EXTENSION; + type->id = xmlGetProp(node, BAD_CAST "id"); + ctxt->container = name; + + type->base = xmlGetQNameProp(ctxt, node, "base", &(type->baseNs)); + if (type->base == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Extension %s has no base\n", + type->name); + } + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + + if (IS_SCHEMA(child, "all")) { + subtype = xmlSchemaParseAll(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "sequence")) { + subtype = xmlSchemaParseSequence(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "group")) { + subtype = xmlSchemaParseGroup(ctxt, schema, child); + child = child->next; + } + if (subtype != NULL) + type->subtypes = subtype; + child = xmlSchemaParseAttrDecls(ctxt, schema, child, type); + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Extension %s has unexpected content\n", + type->name); + } + ctxt->container = oldcontainer; + return (type); +} + +/** + * xmlSchemaParseSimpleContent: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema SimpleContent definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseSimpleContent(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "complexContent %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_SIMPLE_CONTENT; + type->id = xmlGetProp(node, BAD_CAST "id"); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "restriction")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseRestriction(ctxt, schema, child, 0); + child = child->next; + } else if (IS_SCHEMA(child, "extension")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseExtension(ctxt, schema, child); + child = child->next; + } + type->subtypes = subtype; + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "SimpleContent %s has unexpected content\n", + type->name); + } + return (type); +} + +/** + * xmlSchemaParseComplexContent: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema ComplexContent definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseComplexContent(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "complexContent %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_COMPLEX_CONTENT; + type->id = xmlGetProp(node, BAD_CAST "id"); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "restriction")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseRestriction(ctxt, schema, child, 0); + child = child->next; + } else if (IS_SCHEMA(child, "extension")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseExtension(ctxt, schema, child); + child = child->next; + } + type->subtypes = subtype; + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "ComplexContent %s has unexpected content\n", + type->name); + } + return (type); +} + +/** + * xmlSchemaParseComplexType: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Complex Type definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseComplexType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar *name; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + oldcontainer = ctxt->container; + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + snprintf(buf, 99, "anontype%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + if (name == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "complexType has no name\n"); + return (NULL); + } + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) { + xmlFree(name); + return (NULL); + } + type->node = node; + type->type = XML_SCHEMA_TYPE_COMPLEX; + type->id = xmlGetProp(node, BAD_CAST "id"); + ctxt->container = name; + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (IS_SCHEMA(child, "simpleContent")) { + type->subtypes = xmlSchemaParseSimpleContent(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "complexContent")) { + type->subtypes = xmlSchemaParseComplexContent(ctxt, schema, child); + child = child->next; + } else { + subtype = NULL; + + if (IS_SCHEMA(child, "all")) { + subtype = xmlSchemaParseAll(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "sequence")) { + subtype = xmlSchemaParseSequence(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "group")) { + subtype = xmlSchemaParseGroup(ctxt, schema, child); + child = child->next; + } + if (subtype != NULL) + type->subtypes = subtype; + child = xmlSchemaParseAttrDecls(ctxt, schema, child, type); + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "ComplexType %s has unexpected content\n", + type->name); + } + ctxt->container = oldcontainer; + xmlFree(name); + return (type); +} + + +/** + * xmlSchemaParseSchema: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * + * parse a XML schema definition from a node set + * *WARNING* this interface is highly subject to change + * + * Returns the internal XML Schema structure built from the resource or + * NULL in case of error + */ +static xmlSchemaPtr +xmlSchemaParseSchema(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) +{ + xmlSchemaPtr schema = NULL; + xmlSchemaAnnotPtr annot; + xmlNodePtr child = NULL; + xmlChar *val; + + if ((ctxt == NULL) || (node == NULL)) + return (NULL); + + if (IS_SCHEMA(node, "schema")) { + schema = xmlSchemaNewSchema(ctxt); + if (schema == NULL) + return(NULL); + schema->targetNamespace = xmlGetProp(node, BAD_CAST "targetNamespace"); + schema->id = xmlGetProp(node, BAD_CAST "id"); + schema->version = xmlGetProp(node, BAD_CAST "version"); + val = xmlGetProp(node, BAD_CAST "elementFormDefault"); + if (val != NULL) { + if (xmlStrEqual(val, BAD_CAST "qualified")) + schema->flags |= XML_SCHEMAS_QUALIF_ELEM; + else if (!xmlStrEqual(val, BAD_CAST "unqualified")) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) { + ctxt->error(ctxt->userData, + "Invalid value %s for elementFormDefault\n", + val); + } + } + xmlFree(val); + } + val = xmlGetProp(node, BAD_CAST "attributeFormDefault"); + if (val != NULL) { + if (xmlStrEqual(val, BAD_CAST "qualified")) + schema->flags |= XML_SCHEMAS_QUALIF_ATTR; + else if (!xmlStrEqual(val, BAD_CAST "unqualified")) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) { + ctxt->error(ctxt->userData, + "Invalid value %s for elementFormDefault\n", + val); + } + } + xmlFree(val); + } + + child = node->children; + while ((IS_SCHEMA(child, "include")) || + (IS_SCHEMA(child, "import")) || + (IS_SCHEMA(child, "redefine")) || + (IS_SCHEMA(child, "annotation"))) { + if (IS_SCHEMA(child, "annotation")) { + annot = xmlSchemaParseAnnotation(ctxt, schema, child); + if (schema->annot == NULL) + schema->annot = annot; + else + xmlSchemaFreeAnnot(annot); + } else if (IS_SCHEMA(child, "include")) { + TODO + } else if (IS_SCHEMA(child, "import")) { + xmlSchemaParseImport(ctxt, schema, child); + } else if (IS_SCHEMA(child, "redefine")) { + TODO + } + child = child->next; + } + while (child != NULL) { + if (IS_SCHEMA(child, "complexType")) { + xmlSchemaParseComplexType(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "simpleType")) { + xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "element")) { + xmlSchemaParseElement(ctxt, schema, child, 1); + child = child->next; + } else if (IS_SCHEMA(child, "attribute")) { + xmlSchemaParseAttribute(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "attributeGroup")) { + xmlSchemaParseAttributeGroup(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "group")) { + xmlSchemaParseGroup(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "notation")) { + xmlSchemaParseNotation(ctxt, schema, child); + child = child->next; + } else { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: unexpected element %s here \n", + child->name); + child = child->next; + } + while (IS_SCHEMA(child, "annotation")) { + annot = xmlSchemaParseAnnotation(ctxt, schema, child); + if (schema->annot == NULL) + schema->annot = annot; + else + xmlSchemaFreeAnnot(annot); + child = child->next; + } + } + } +#ifdef DEBUG + if (schema == NULL) + xmlGenericError(xmlGenericErrorContext, + "xmlSchemaParse() failed\n"); +#endif + + return (schema); +} + +/************************************************************************ + * * + * Validating using Schemas * + * * + ************************************************************************/ + +/************************************************************************ + * * + * Reading/Writing Schemas * + * * + ************************************************************************/ + +/** + * xmlSchemaNewParserCtxt: + * @URL: the location of the schema + * + * Create an XML Schemas parse context for that file/resource expected + * to contain an XML Schemas file. + * + * Returns the parser context or NULL in case of error + */ +xmlSchemaParserCtxtPtr +xmlSchemaNewParserCtxt(const char *URL) { + xmlSchemaParserCtxtPtr ret; + + if (URL == NULL) + return(NULL); + + ret = (xmlSchemaParserCtxtPtr) xmlMalloc(sizeof(xmlSchemaParserCtxt)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Failed to allocate new schama parser context for %s\n", URL); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaParserCtxt)); + ret->URL = xmlStrdup((const xmlChar *)URL); + return (ret); +} + +/** + * xmlSchemaNewMemParserCtxt: + * @buffer: a pointer to a char array containing the schemas + * @size: the size of the array + * + * Create an XML Schemas parse context for that memory buffer expected + * to contain an XML Schemas file. + * + * Returns the parser context or NULL in case of error + */ +xmlSchemaParserCtxtPtr +xmlSchemaNewMemParserCtxt(const char *buffer, int size) { + xmlSchemaParserCtxtPtr ret; + + if ((buffer == NULL) || (size <= 0)) + return(NULL); + + ret = (xmlSchemaParserCtxtPtr) xmlMalloc(sizeof(xmlSchemaParserCtxt)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Failed to allocate new schama parser context\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaParserCtxt)); + ret->buffer = buffer; + ret->size = size; + return (ret); +} + +/** + * xmlSchemaFreeParserCtxt: + * @ctxt: the schema parser context + * + * Free the resources associated to the schema parser context + */ +void +xmlSchemaFreeParserCtxt(xmlSchemaParserCtxtPtr ctxt) { + if (ctxt == NULL) + return; + if (ctxt->URL != NULL) + xmlFree(ctxt->URL); + if (ctxt->doc != NULL) + xmlFreeDoc(ctxt->doc); + xmlFree(ctxt); +} + +/************************************************************************ + * * + * Building the content models * + * * + ************************************************************************/ +/** + * xmlSchemaBuildAContentModel: + * @type: the schema type definition + * @ctxt: the schema parser context + * @name: the element name whose content is being built + * + * Generate the automata sequence needed for that type + */ +static void +xmlSchemaBuildAContentModel(xmlSchemaTypePtr type, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) { + if (type == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Found unexpected type = NULL in %s content model\n", + name); + return; + } + switch (type->type) { + case XML_SCHEMA_TYPE_ANY: + /* TODO : handle the namespace too */ + /* TODO : make that a specific transition type */ + TODO + ctxt->state = xmlAutomataNewTransition(ctxt->am, ctxt->state, + NULL, BAD_CAST "*", NULL); + break; + case XML_SCHEMA_TYPE_ELEMENT: { + xmlSchemaElementPtr elem = (xmlSchemaElementPtr) type; + /* TODO : handle the namespace too */ + xmlAutomataStatePtr oldstate = ctxt->state; + if (elem->maxOccurs >= UNBOUNDED) { + if (elem->minOccurs > 1) { + xmlAutomataStatePtr tmp; + int counter; + + ctxt->state = xmlAutomataNewEpsilon(ctxt->am, + oldstate, NULL); + oldstate = ctxt->state; + + counter = xmlAutomataNewCounter(ctxt->am, + elem->minOccurs - 1, UNBOUNDED); + + if (elem->refDecl != NULL) { + xmlSchemaBuildAContentModel( + (xmlSchemaTypePtr) elem->refDecl, + ctxt, elem->refDecl->name); + } else { + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, elem->name, type); + } + tmp = ctxt->state; + xmlAutomataNewCountedTrans(ctxt->am, tmp, oldstate, + counter); + ctxt->state = xmlAutomataNewCounterTrans(ctxt->am, tmp, + NULL, counter); + + } else { + if (elem->refDecl != NULL) { + xmlSchemaBuildAContentModel( + (xmlSchemaTypePtr) elem->refDecl, + ctxt, elem->refDecl->name); + } else { + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, elem->name, type); + } + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate); + if (elem->minOccurs == 0) { + /* basically an elem* */ + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + } + } + } else if ((elem->maxOccurs > 1) || (elem->minOccurs > 1)) { + xmlAutomataStatePtr tmp; + int counter; + + ctxt->state = xmlAutomataNewEpsilon(ctxt->am, + oldstate, NULL); + oldstate = ctxt->state; + + counter = xmlAutomataNewCounter(ctxt->am, + elem->minOccurs - 1, elem->maxOccurs - 1); + + if (elem->refDecl != NULL) { + xmlSchemaBuildAContentModel( + (xmlSchemaTypePtr) elem->refDecl, + ctxt, elem->refDecl->name); + } else { + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, elem->name, type); + } + tmp = ctxt->state; + xmlAutomataNewCountedTrans(ctxt->am, tmp, oldstate, + counter); + ctxt->state = xmlAutomataNewCounterTrans(ctxt->am, tmp, + NULL, counter); + if (elem->minOccurs == 0) { + /* basically an elem? */ + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + } + + } else { + if (elem->refDecl != NULL) { + xmlSchemaBuildAContentModel( + (xmlSchemaTypePtr) elem->refDecl, + ctxt, elem->refDecl->name); + } else { + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, elem->name, type); + } + if (elem->minOccurs == 0) { + /* basically an elem? */ + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + } + } + break; + } + case XML_SCHEMA_TYPE_SEQUENCE: { + xmlSchemaTypePtr subtypes; + + /* + * If max and min occurances are default (1) then + * simply iterate over the subtypes + */ + if ((type->minOccurs == 1 ) && (type->maxOccurs == 1)) { + subtypes = type->subtypes; + while (subtypes != NULL) { + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + subtypes = subtypes->next; + } + } else { + xmlAutomataStatePtr oldstate = ctxt->state; + if (type->maxOccurs >= UNBOUNDED) { + if (type->minOccurs > 1) { + xmlAutomataStatePtr tmp; + int counter; + + ctxt->state = xmlAutomataNewEpsilon(ctxt->am, + oldstate, NULL); + oldstate = ctxt->state; + + counter = xmlAutomataNewCounter(ctxt->am, + type->minOccurs - 1, UNBOUNDED); + + subtypes = type->subtypes; + while (subtypes != NULL) { + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + subtypes = subtypes->next; + } + tmp = ctxt->state; + xmlAutomataNewCountedTrans(ctxt->am, tmp, oldstate, + counter); + ctxt->state = xmlAutomataNewCounterTrans(ctxt->am, tmp, + NULL, counter); + + } else { + subtypes = type->subtypes; + while (subtypes != NULL) { + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + subtypes = subtypes->next; + } + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate); + if (type->minOccurs == 0) { + xmlAutomataNewEpsilon(ctxt->am, oldstate, + ctxt->state); + } + } + } else if ((type->maxOccurs > 1) || (type->minOccurs > 1)) { + xmlAutomataStatePtr tmp; + int counter; + + ctxt->state = xmlAutomataNewEpsilon(ctxt->am, + oldstate, NULL); + oldstate = ctxt->state; + + counter = xmlAutomataNewCounter(ctxt->am, + type->minOccurs - 1, type->maxOccurs - 1); + + subtypes = type->subtypes; + while (subtypes != NULL) { + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + subtypes = subtypes->next; + } + tmp = ctxt->state; + xmlAutomataNewCountedTrans(ctxt->am, tmp, oldstate, + counter); + ctxt->state = xmlAutomataNewCounterTrans(ctxt->am, tmp, + NULL, counter); + if (type->minOccurs == 0) { + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + } + + } else { + subtypes = type->subtypes; + while (subtypes != NULL) { + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + subtypes = subtypes->next; + } + if (type->minOccurs == 0) { + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + } + } + } + break; + } + case XML_SCHEMA_TYPE_CHOICE: { + xmlSchemaTypePtr subtypes; + xmlAutomataStatePtr start, end; + + start = ctxt->state; + end = xmlAutomataNewState(ctxt->am); + + /* + * iterate over the subtypes and remerge the end with an + * epsilon transition + */ + if (type->maxOccurs == 1) { + subtypes = type->subtypes; + while (subtypes != NULL) { + ctxt->state = start; + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, end); + subtypes = subtypes->next; + } + } else { + int counter; + xmlAutomataStatePtr hop; + int maxOccurs = type->maxOccurs == UNBOUNDED ? + UNBOUNDED : type->maxOccurs - 1; + int minOccurs = type->minOccurs < 1 ? 0 : type->minOccurs - 1; + + /* + * use a counter to keep track of the number of transtions + * which went through the choice. + */ + counter = xmlAutomataNewCounter(ctxt->am, minOccurs, maxOccurs); + hop = xmlAutomataNewState(ctxt->am); + + subtypes = type->subtypes; + while (subtypes != NULL) { + ctxt->state = start; + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, hop); + subtypes = subtypes->next; + } + xmlAutomataNewCountedTrans(ctxt->am, hop, start, counter); + xmlAutomataNewCounterTrans(ctxt->am, hop, end, counter); + } + if (type->minOccurs == 0) { + xmlAutomataNewEpsilon(ctxt->am, start, end); + } + ctxt->state = end; + break; + } + case XML_SCHEMA_TYPE_ALL: { + xmlAutomataStatePtr start; + xmlSchemaTypePtr subtypes; + xmlSchemaElementPtr elem = (xmlSchemaElementPtr) type; + int lax; + + subtypes = type->subtypes; + if (subtypes == NULL) + break; + start = ctxt->state; + while (subtypes != NULL) { + ctxt->state = start; + elem = (xmlSchemaElementPtr) subtypes; + + /* TODO : handle the namespace too */ + xmlAutomataNewOnceTrans(ctxt->am, ctxt->state, ctxt->state, + elem->name, elem->minOccurs, elem->maxOccurs, + subtypes); + subtypes = subtypes->next; + } + lax = type->minOccurs == 0; + ctxt->state = xmlAutomataNewAllTrans(ctxt->am, ctxt->state, NULL, + lax); + break; + } + case XML_SCHEMA_TYPE_RESTRICTION: + if (type->subtypes != NULL) + xmlSchemaBuildAContentModel(type->subtypes, ctxt, name); + break; + case XML_SCHEMA_TYPE_EXTENSION: + if (type->baseType != NULL) { + xmlSchemaTypePtr subtypes; + + xmlSchemaBuildAContentModel(type->baseType, ctxt, name); + subtypes = type->subtypes; + while (subtypes != NULL) { + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + subtypes = subtypes->next; + } + } else if (type->subtypes != NULL) + xmlSchemaBuildAContentModel(type->subtypes, ctxt, name); + break; + case XML_SCHEMA_TYPE_GROUP: + case XML_SCHEMA_TYPE_COMPLEX: + case XML_SCHEMA_TYPE_COMPLEX_CONTENT: + if (type->subtypes != NULL) + xmlSchemaBuildAContentModel(type->subtypes, ctxt, name); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "Found unexpected type %d in %s content model\n", + type->type, name); + return; + } +} +/** + * xmlSchemaBuildContentModel: + * @typeDecl: the schema type definition + * @ctxt: the schema parser context + * + * Fixes the content model of the element. + */ +static void +xmlSchemaBuildContentModel(xmlSchemaElementPtr elem, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) { + xmlAutomataStatePtr start; + + if (elem->contModel != NULL) + return; + if (elem->subtypes == NULL) { + elem->contentType = XML_SCHEMA_CONTENT_ANY; + return; + } + if (elem->subtypes->type != XML_SCHEMA_TYPE_COMPLEX) + return; + if (elem->subtypes->contentType == XML_SCHEMA_CONTENT_BASIC) + return; + +#ifdef DEBUG_CONTENT + xmlGenericError(xmlGenericErrorContext, + "Building content model for %s\n", name); +#endif + + ctxt->am = xmlNewAutomata(); + if (ctxt->am == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot create automata for elem %s\n", name); + return; + } + start = ctxt->state = xmlAutomataGetInitState(ctxt->am); + xmlSchemaBuildAContentModel(elem->subtypes, ctxt, name); + xmlAutomataSetFinalState(ctxt->am, ctxt->state); + elem->contModel = xmlAutomataCompile(ctxt->am); + if (!xmlAutomataIsDeterminist(ctxt->am)) { + xmlGenericError(xmlGenericErrorContext, + "Content model of %s is not determinist:\n", name); + ctxt->err = XML_SCHEMAS_ERR_NOTDETERMINIST; + ctxt->state = NULL; + } else { +#ifdef DEBUG_CONTENT_REGEXP + xmlGenericError(xmlGenericErrorContext, + "Content model of %s:\n", name); + xmlRegexpPrint(stderr, elem->contModel); +#endif + ctxt->state = NULL; + } + xmlFreeAutomata(ctxt->am); + ctxt->am = NULL; +} + +/** + * xmlSchemaRefFixupCallback: + * @elem: the schema element context + * @ctxt: the schema parser context + * + * Free the resources associated to the schema parser context + */ +static void +xmlSchemaRefFixupCallback(xmlSchemaElementPtr elem, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name, + const xmlChar *context ATTRIBUTE_UNUSED, + const xmlChar *namespace ATTRIBUTE_UNUSED) +{ + if ((ctxt == NULL) || (elem == NULL)) + return; + if (elem->ref != NULL) { + xmlSchemaElementPtr elemDecl; + + if (elem->subtypes != NULL) { + xmlSchemaErrorContext(ctxt, NULL, elem->node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: element %s have both ref and subtype\n", + name); + return; + } + elemDecl = xmlHashLookup2(ctxt->schema->elemDecl, + elem->ref, elem->refNs); + + if (elemDecl == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: element %s ref to %s not found\n", + name, elem->ref); + return; + } + elem->refDecl = elemDecl; + } else if (elem->namedType != NULL) { + xmlSchemaTypePtr typeDecl; + + if (elem->subtypes != NULL) { + xmlSchemaErrorContext(ctxt, NULL, elem->node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: element %s have both type and subtype\n", + name); + return; + } + typeDecl = xmlSchemaGetType(ctxt->schema, elem->namedType, + elem->namedTypeNs); + + if (typeDecl == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: element %s type %s not found\n", + name, elem->namedType); + return; + } + elem->subtypes = typeDecl; + } +} + +/** + * xmlSchemaTypeFixup: + * @typeDecl: the schema type definition + * @ctxt: the schema parser context + * + * Fixes the content model of the type. + */ +static void +xmlSchemaTypeFixup(xmlSchemaTypePtr typeDecl, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) +{ + if (name == NULL) + name = typeDecl->name; + if (typeDecl->contentType == XML_SCHEMA_CONTENT_UNKNOWN) { + switch (typeDecl->type) { + case XML_SCHEMA_TYPE_SIMPLE_CONTENT: { + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + typeDecl->contentType = typeDecl->subtypes->contentType; + break; + } + case XML_SCHEMA_TYPE_RESTRICTION: { + if (typeDecl->subtypes != NULL) + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + + if (typeDecl->base != NULL) { + xmlSchemaTypePtr baseType; + + baseType = xmlSchemaGetType(ctxt->schema, typeDecl->base, + typeDecl->baseNs); + if (baseType == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: type %s base type %s not found\n", + name, typeDecl->base); + } + typeDecl->baseType = baseType; + } + if (typeDecl->subtypes == NULL) + /* 1.1.1 */ + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + else if ((typeDecl->subtypes->subtypes == NULL) && + ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_ALL) || + (typeDecl->subtypes->type == XML_SCHEMA_TYPE_SEQUENCE))) + /* 1.1.2 */ + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + else if ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_CHOICE) && + (typeDecl->subtypes->subtypes == NULL)) + /* 1.1.3 */ + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + else { + /* 1.2 and 2.X are applied at the other layer */ + typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS; + } + break; + } + case XML_SCHEMA_TYPE_EXTENSION: { + xmlSchemaContentType explicitContentType; + xmlSchemaTypePtr base; + + if (typeDecl->base != NULL) { + xmlSchemaTypePtr baseType; + + baseType = xmlSchemaGetType(ctxt->schema, typeDecl->base, + typeDecl->baseNs); + if (baseType == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: type %s base type %s not found\n", + name, typeDecl->base); + } + typeDecl->baseType = baseType; + } + if (typeDecl->subtypes != NULL) + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + + explicitContentType = XML_SCHEMA_CONTENT_ELEMENTS; + if (typeDecl->subtypes == NULL) + /* 1.1.1 */ + explicitContentType = XML_SCHEMA_CONTENT_EMPTY; + else if ((typeDecl->subtypes->subtypes == NULL) && + ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_ALL) || + (typeDecl->subtypes->type == XML_SCHEMA_TYPE_SEQUENCE))) + /* 1.1.2 */ + explicitContentType = XML_SCHEMA_CONTENT_EMPTY; + else if ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_CHOICE) && + (typeDecl->subtypes->subtypes == NULL)) + /* 1.1.3 */ + explicitContentType = XML_SCHEMA_CONTENT_EMPTY; + + base = xmlSchemaGetType(ctxt->schema, typeDecl->base, + typeDecl->baseNs); + if (base == NULL) { + xmlSchemaErrorContext(ctxt, NULL, typeDecl->node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: base type %s of type %s not found\n", + typeDecl->base, name); + return; + } + xmlSchemaTypeFixup(base, ctxt, NULL); + if (explicitContentType == XML_SCHEMA_CONTENT_EMPTY) { + /* 2.1 */ + typeDecl->contentType = base->contentType; + } else if (base->contentType == XML_SCHEMA_CONTENT_EMPTY) { + /* 2.2 imbitable ! */ + typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS; + } else { + /* 2.3 imbitable pareil ! */ + typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS; + } + break; + } + case XML_SCHEMA_TYPE_COMPLEX: { + if (typeDecl->subtypes == NULL) { + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + } else { + if (typeDecl->flags & XML_SCHEMAS_TYPE_MIXED) + typeDecl->contentType = XML_SCHEMA_CONTENT_MIXED; + else { + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + typeDecl->contentType = typeDecl->subtypes->contentType; + } + } + break; + } + case XML_SCHEMA_TYPE_COMPLEX_CONTENT: { + if (typeDecl->subtypes == NULL) { + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + } else { + if (typeDecl->flags & XML_SCHEMAS_TYPE_MIXED) + typeDecl->contentType = XML_SCHEMA_CONTENT_MIXED; + else { + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + typeDecl->contentType = typeDecl->subtypes->contentType; + } + } + break; + } + case XML_SCHEMA_TYPE_SEQUENCE: + case XML_SCHEMA_TYPE_GROUP: + case XML_SCHEMA_TYPE_ALL: + case XML_SCHEMA_TYPE_CHOICE: + typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS; + break; + case XML_SCHEMA_TYPE_BASIC: + case XML_SCHEMA_TYPE_ANY: + case XML_SCHEMA_TYPE_FACET: + case XML_SCHEMA_TYPE_SIMPLE: + case XML_SCHEMA_TYPE_UR: + case XML_SCHEMA_TYPE_ELEMENT: + case XML_SCHEMA_TYPE_ATTRIBUTE: + case XML_SCHEMA_TYPE_ATTRIBUTEGROUP: + case XML_SCHEMA_TYPE_NOTATION: + case XML_SCHEMA_TYPE_LIST: + case XML_SCHEMA_TYPE_UNION: + case XML_SCHEMA_FACET_MININCLUSIVE: + case XML_SCHEMA_FACET_MINEXCLUSIVE: + case XML_SCHEMA_FACET_MAXINCLUSIVE: + case XML_SCHEMA_FACET_MAXEXCLUSIVE: + case XML_SCHEMA_FACET_TOTALDIGITS: + case XML_SCHEMA_FACET_FRACTIONDIGITS: + case XML_SCHEMA_FACET_PATTERN: + case XML_SCHEMA_FACET_ENUMERATION: + case XML_SCHEMA_FACET_WHITESPACE: + case XML_SCHEMA_FACET_LENGTH: + case XML_SCHEMA_FACET_MAXLENGTH: + case XML_SCHEMA_FACET_MINLENGTH: + typeDecl->contentType = XML_SCHEMA_CONTENT_SIMPLE; + break; + } + } +#ifdef DEBUG_TYPE + if (typeDecl->node != NULL) { + xmlGenericError(xmlGenericErrorContext, + "Type of %s : %s:%d :", name, typeDecl->node->doc->URL, + xmlGetLineNo(typeDecl->node)); + } else { + xmlGenericError(xmlGenericErrorContext, + "Type of %s :", name); + } + switch (typeDecl->contentType) { + case XML_SCHEMA_CONTENT_SIMPLE: + xmlGenericError(xmlGenericErrorContext, + "simple\n"); break; + case XML_SCHEMA_CONTENT_ELEMENTS: + xmlGenericError(xmlGenericErrorContext, + "elements\n"); break; + case XML_SCHEMA_CONTENT_UNKNOWN: + xmlGenericError(xmlGenericErrorContext, + "unknown !!!\n"); break; + case XML_SCHEMA_CONTENT_EMPTY: + xmlGenericError(xmlGenericErrorContext, + "empty\n"); break; + case XML_SCHEMA_CONTENT_MIXED: + xmlGenericError(xmlGenericErrorContext, + "mixed\n"); break; + case XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS: + xmlGenericError(xmlGenericErrorContext, + "mixed or elems\n"); break; + case XML_SCHEMA_CONTENT_BASIC: + xmlGenericError(xmlGenericErrorContext, + "basic\n"); break; + default: + xmlGenericError(xmlGenericErrorContext, + "not registered !!!\n"); break; + } +#endif +} + +/** + * xmlSchemaCheckDefaults: + * @typeDecl: the schema type definition + * @ctxt: the schema parser context + * + * Checks the default values types, especially for facets + */ +static void +xmlSchemaCheckDefaults(xmlSchemaTypePtr typeDecl, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) +{ + static xmlSchemaTypePtr nonNegativeIntegerType = NULL; + if (name == NULL) + name = typeDecl->name; + if (nonNegativeIntegerType == NULL) { + nonNegativeIntegerType = xmlSchemaGetPredefinedType( + BAD_CAST "nonNegativeInteger", xmlSchemaNs); + } + if (typeDecl->type == XML_SCHEMA_TYPE_RESTRICTION) { + if (typeDecl->facets != NULL) { + xmlSchemaFacetPtr facet = typeDecl->facets; + while (facet != NULL) { + switch (facet->type) { + case XML_SCHEMA_FACET_MININCLUSIVE: + case XML_SCHEMA_FACET_MINEXCLUSIVE: + case XML_SCHEMA_FACET_MAXINCLUSIVE: + case XML_SCHEMA_FACET_MAXEXCLUSIVE: { + /* + * Okay we need to validate the value + * at that point. + */ + xmlSchemaValidCtxtPtr vctxt; + + vctxt = xmlSchemaNewValidCtxt(NULL); + if (vctxt == NULL) + break; + xmlSchemaValidateSimpleValue(vctxt, typeDecl, + facet->value); + facet->val = vctxt->value; + vctxt->value = NULL; + if (facet->val == NULL) { + /* error code */ + xmlSchemaErrorContext(ctxt, NULL, + facet->node, NULL); + ctxt->error(ctxt->userData, + "Schemas: type %s facet value %s invalid\n", + name, facet->value); + } + xmlSchemaFreeValidCtxt(vctxt); + break; + } + case XML_SCHEMA_FACET_ENUMERATION: { + /* + * Okay we need to validate the value + * at that point. + */ + xmlSchemaValidCtxtPtr vctxt; + int ret; + + vctxt = xmlSchemaNewValidCtxt(NULL); + if (vctxt == NULL) + break; + ret = xmlSchemaValidateSimpleValue(vctxt, typeDecl, + facet->value); + if (ret != 0) { + xmlSchemaErrorContext(ctxt, NULL, + facet->node, NULL); + ctxt->error(ctxt->userData, + "Schemas: type %s enumeration value %s invalid\n", + name, facet->value); + } + xmlSchemaFreeValidCtxt(vctxt); + break; + } + case XML_SCHEMA_FACET_PATTERN: + facet->regexp = xmlRegexpCompile(facet->value); + if (facet->regexp == NULL) { + /* error code */ + ctxt->error(ctxt->userData, + "Schemas: type %s facet regexp %s invalid\n", + name, facet->value); + } + break; + case XML_SCHEMA_FACET_TOTALDIGITS: + case XML_SCHEMA_FACET_FRACTIONDIGITS: + case XML_SCHEMA_FACET_LENGTH: + case XML_SCHEMA_FACET_MAXLENGTH: + case XML_SCHEMA_FACET_MINLENGTH: { + int ret; + + ret = xmlSchemaValidatePredefinedType( + nonNegativeIntegerType, facet->value, + &facet->val); + if (ret != 0) { + /* error code */ + xmlSchemaErrorContext(ctxt, NULL, + facet->node, NULL); + ctxt->error(ctxt->userData, + "Schemas: type %s facet value %s invalid\n", + name, facet->value); + } + break; + } + case XML_SCHEMA_FACET_WHITESPACE: { + if (xmlStrEqual(facet->value, BAD_CAST"preserve")) { + facet->whitespace = XML_SCHEMAS_FACET_PRESERVE; + } else if (xmlStrEqual(facet->value, + BAD_CAST"replace")) { + facet->whitespace = XML_SCHEMAS_FACET_REPLACE; + } else if (xmlStrEqual(facet->value, + BAD_CAST"collapse")) { + facet->whitespace = XML_SCHEMAS_FACET_COLLAPSE; + } else { + xmlSchemaErrorContext(ctxt, NULL, + facet->node, NULL); + ctxt->error(ctxt->userData, + "Schemas: type %s whiteSpace value %s invalid\n", + name, facet->value); + } + } + default: + break; + } + facet = facet->next; + } + } + } +} + +/** + * xmlSchemaAttrGrpFixup: + * @attrgrpDecl: the schema attribute definition + * @ctxt: the schema parser context + * @name: the attribute name + * + * Fixes finish doing the computations on the attributes definitions + */ +static void +xmlSchemaAttrGrpFixup(xmlSchemaAttributeGroupPtr attrgrpDecl, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) +{ + if (name == NULL) + name = attrgrpDecl->name; + if (attrgrpDecl->attributes != NULL) + return; + if (attrgrpDecl->ref != NULL) { + xmlSchemaAttributeGroupPtr ref; + + ref = xmlHashLookup2(ctxt->schema->attrgrpDecl, attrgrpDecl->ref, + attrgrpDecl->refNs); + if (ref == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: attribute group %s reference %s not found\n", + name, attrgrpDecl->ref); + return; + } + xmlSchemaAttrGrpFixup(ref, ctxt, NULL); + attrgrpDecl->attributes = ref->attributes; + } else { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: attribute %s has no attributes nor reference\n", + name); + } +} + +/** + * xmlSchemaAttrFixup: + * @attrDecl: the schema attribute definition + * @ctxt: the schema parser context + * @name: the attribute name + * + * Fixes finish doing the computations on the attributes definitions + */ +static void +xmlSchemaAttrFixup(xmlSchemaAttributePtr attrDecl, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) +{ + if (name == NULL) + name = attrDecl->name; + if (attrDecl->subtypes != NULL) + return; + if (attrDecl->typeName != NULL) { + xmlSchemaTypePtr type; + + type = xmlSchemaGetType(ctxt->schema, attrDecl->typeName, + attrDecl->typeNs); + if (type == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: attribute %s type %s not found\n", + name, attrDecl->typeName); + } + attrDecl->subtypes = type; + } else if (attrDecl->ref != NULL) { + xmlSchemaAttributePtr ref; + + ref = xmlHashLookup2(ctxt->schema->attrDecl, attrDecl->ref, + attrDecl->refNs); + if (ref == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: attribute %s reference %s not found\n", + name, attrDecl->ref); + return; + } + xmlSchemaAttrFixup(ref, ctxt, NULL); + attrDecl->subtypes = ref->subtypes; + } else { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: attribute %s has no type nor reference\n", + name); + } +} + +/** + * xmlSchemaParse: + * @ctxt: a schema validation context + * + * parse a schema definition resource and build an internal + * XML Shema struture which can be used to validate instances. + * *WARNING* this interface is highly subject to change + * + * Returns the internal XML Schema structure built from the resource or + * NULL in case of error + */ +xmlSchemaPtr +xmlSchemaParse(xmlSchemaParserCtxtPtr ctxt) +{ + xmlSchemaPtr ret = NULL; + xmlDocPtr doc; + xmlNodePtr root, cur, delete; + + xmlSchemaInitTypes(); + + if (ctxt == NULL) + return (NULL); + + ctxt->counter = 0; + ctxt->container = NULL; + + /* + * First step is to parse the input document into an DOM/Infoset + */ + if (ctxt->URL != NULL) { + doc = xmlParseFile((const char *) ctxt->URL); + if (doc == NULL) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "xmlSchemaParse: could not load %s\n", ctxt->URL); + return (NULL); + } + } else if (ctxt->buffer != NULL) { + doc = xmlParseMemory(ctxt->buffer, ctxt->size); + if (doc == NULL) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "xmlSchemaParse: could not parse schemas\n"); + return (NULL); + } + doc->URL = xmlStrdup(BAD_CAST "in_memory_buffer"); + ctxt->URL = xmlStrdup(BAD_CAST "in_memory_buffer"); + } else { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "xmlSchemaParse: nothing to parse\n"); + return (NULL); + } + + /* + * Then extract the root and Schema parse it + */ + root = xmlDocGetRootElement(doc); + if (root == NULL) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "xmlSchemaParse: %s is empty\n", + ctxt->URL); + return (NULL); + } + + /* + * Remove all the blank text nodes + */ + delete = NULL; + cur = root; + while (cur != NULL) { + if (delete != NULL) { + xmlUnlinkNode(delete); + xmlFreeNode(delete); + delete = NULL; + } + if (cur->type == XML_TEXT_NODE) { + if (IS_BLANK_NODE(cur)) { + if (xmlNodeGetSpacePreserve(cur) != 1) { + delete = cur; + } + } + } else if ((cur->type != XML_ELEMENT_NODE) && + (cur->type != XML_CDATA_SECTION_NODE)) { + delete = cur; + goto skip_children; + } + + /* + * Skip to next node + */ + if (cur->children != NULL) { + if ((cur->children->type != XML_ENTITY_DECL) && + (cur->children->type != XML_ENTITY_REF_NODE) && + (cur->children->type != XML_ENTITY_NODE)) { + cur = cur->children; + continue; + } + } +skip_children: + if (cur->next != NULL) { + cur = cur->next; + continue; + } + + do { + cur = cur->parent; + if (cur == NULL) + break; + if (cur == root) { + cur = NULL; + break; + } + if (cur->next != NULL) { + cur = cur->next; + break; + } + } while (cur != NULL); + } + if (delete != NULL) { + xmlUnlinkNode(delete); + xmlFreeNode(delete); + delete = NULL; + } + + /* + * Then do the parsing for good + */ + ret = xmlSchemaParseSchema(ctxt, root); + if (ret == NULL) + return(NULL); + ret->doc = doc; + + /* + * Then fix all the references. + */ + ctxt->schema = ret; + xmlHashScanFull(ret->elemDecl, + (xmlHashScannerFull) xmlSchemaRefFixupCallback, ctxt); + + /* + * Then fixup all types properties + */ + xmlHashScan(ret->typeDecl, (xmlHashScanner) xmlSchemaTypeFixup, ctxt); + + /* + * Then build the content model for all elements + */ + xmlHashScan(ret->elemDecl, + (xmlHashScanner) xmlSchemaBuildContentModel, ctxt); + + /* + * Then check the defaults part of the type like facets values + */ + xmlHashScan(ret->typeDecl, (xmlHashScanner) xmlSchemaCheckDefaults, ctxt); + + /* + * Then fixup all attributes declarations + */ + xmlHashScan(ret->attrDecl, (xmlHashScanner) xmlSchemaAttrFixup, ctxt); + + /* + * Then fixup all attributes group declarations + */ + xmlHashScan(ret->attrgrpDecl, (xmlHashScanner) xmlSchemaAttrGrpFixup, ctxt); + + return (ret); +} + +/** + * xmlSchemaSetParserErrors: + * @ctxt: a schema validation context + * @err: the error callback + * @warn: the warning callback + * @ctx: contextual data for the callbacks + * + * Set the callback functions used to handle errors for a validation context + */ +void +xmlSchemaSetParserErrors(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, void *ctx) { + if (ctxt == NULL) + return; + ctxt->error = err; + ctxt->warning = warn; + ctxt->userData = ctx; +} + +/************************************************************************ + * * + * Simple type validation * + * * + ************************************************************************/ + +/** + * xmlSchemaValidateSimpleValue: + * @ctxt: a schema validation context + * @type: the type declaration + * @value: the value to validate + * + * Validate a value against a simple type + * + * Returns 0 if the value is valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateSimpleValue(xmlSchemaValidCtxtPtr ctxt, + xmlSchemaTypePtr type, + xmlChar *value) { + int ret = 0; + /* + * First normalize the value accordingly to Schema Datatype + * 4.3.6 whiteSpace definition of the whiteSpace facet of type + */ + /* + * Then check the normalized value against the lexical space of the + * type. + */ + if (type->type == XML_SCHEMA_TYPE_BASIC) { + if (ctxt->value != NULL) { + xmlSchemaFreeValue(ctxt->value); + ctxt->value = NULL; + } + ret = xmlSchemaValidatePredefinedType(type, value, &(ctxt->value)); + } else if (type->type == XML_SCHEMA_TYPE_RESTRICTION) { + xmlSchemaTypePtr base; + xmlSchemaFacetPtr facet; + int tmp; + + base = type->baseType; + if (base != NULL) { + ret = xmlSchemaValidateSimpleValue(ctxt, base, value); + } else if (type->subtypes != NULL) { + + } + /* + * Do not validate facets when working on building the Schemas + */ + if (ctxt->schema != NULL) { + if (ret == 0) { + facet = type->facets; + if ((type->type == XML_SCHEMA_TYPE_RESTRICTION) && + (facet != NULL) && + (facet->type == XML_SCHEMA_FACET_ENUMERATION)) { + while (facet != NULL) { + ret = 1; + + tmp = xmlSchemaValidateFacet(base, facet, value, + ctxt->value); + if (tmp == 0) { + ret = 0; + break; + } + facet = facet->next; + } + } else { + while (facet != NULL) { + tmp = xmlSchemaValidateFacet(base, facet, value, + ctxt->value); + if (tmp != 0) + ret = tmp; + facet = facet->next; + } + } + } + } + } else if (type->type == XML_SCHEMA_TYPE_SIMPLE) { + xmlSchemaTypePtr base; + + base = type->subtypes; + if (base != NULL) { + ret = xmlSchemaValidateSimpleValue(ctxt, base, value); + } else { + TODO + } + } else if (type->type == XML_SCHEMA_TYPE_LIST) { + xmlSchemaTypePtr base; + xmlChar *cur, *end, tmp; + int ret2; + + base = type->subtypes; + if (base == NULL) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) { + xmlSchemaErrorContext(NULL, ctxt->schema, type->node, NULL); + ctxt->error(ctxt->userData, + "Internal: List type %s has no base type\n", + type->name); + } + return(-1); + } + cur = value; + do { + while (IS_BLANK(*cur)) cur++; + end = cur; + while ((*end != 0) && (!(IS_BLANK(*end)))) end++; + if (end == cur) + break; + tmp = *end; + *end = 0; + ret2 = xmlSchemaValidateSimpleValue(ctxt, base, cur); + if (ret2 != 0) + ret = 1; + *end = tmp; + cur = end; + } while (*cur != 0); + } else { + TODO + } + return(ret); +} + +/************************************************************************ + * * + * DOM Validation code * + * * + ************************************************************************/ + +static int xmlSchemaValidateContent(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr node); +static int xmlSchemaValidateAttributes(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr elem, xmlSchemaAttributePtr attributes); +static int xmlSchemaValidateType(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr elem, xmlSchemaElementPtr elemDecl, xmlSchemaTypePtr type); + +/** + * xmlSchemaRegisterAttributes: + * @ctxt: a schema validation context + * @attrs: a list of attributes + * + * Register the list of attributes as the set to be validated on that element + * + * Returns -1 in case of error, 0 otherwise + */ +static int +xmlSchemaRegisterAttributes(xmlSchemaValidCtxtPtr ctxt, + xmlAttrPtr attrs) { + while (attrs != NULL) { + if ((attrs->ns != NULL) && + (xmlStrEqual(attrs->ns->href, xmlSchemaInstanceNs))) { + attrs = attrs->next; + continue; + } + if (ctxt->attrNr >= ctxt->attrMax) { + xmlSchemaAttrStatePtr tmp; + + ctxt->attrMax *= 2; + tmp = (xmlSchemaAttrStatePtr) + xmlRealloc(ctxt->attr, ctxt->attrMax * + sizeof(xmlSchemaAttrState)); + if (tmp == NULL) { + ctxt->attrMax /= 2; + return(-1); + } + ctxt->attr = tmp; + } + ctxt->attr[ctxt->attrNr].attr = attrs; + ctxt->attr[ctxt->attrNr].state = XML_SCHEMAS_ATTR_UNKNOWN; + ctxt->attrNr++; + attrs = attrs->next; + } + return(0); +} + +/** + * xmlSchemaCheckAttributes: + * @ctxt: a schema validation context + * @node: the node carrying it. + * + * Check that the registered set of attributes on the current node + * has been properly validated. + * + * Returns 0 if validity constraints are met, 1 otherwise. + */ +static int +xmlSchemaCheckAttributes(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + int ret = 0; + int i; + + for (i = ctxt->attrBase;i < ctxt->attrNr;i++) { + if (ctxt->attr[i].attr == NULL) + break; + if (ctxt->attr[i].state == XML_SCHEMAS_ATTR_UNKNOWN) { + ret = 1; + ctxt->err = XML_SCHEMAS_ERR_ATTRUNKNOWN; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Attribute %s on %s is unknown\n", + ctxt->attr[i].attr->name, + node->name); + } + } + return(ret); +} + +/** + * xmlSchemaValidateSimpleContent: + * @ctxt: a schema validation context + * @elem: an element + * @type: the type declaration + * + * Validate the content of an element expected to be a simple type + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateSimpleContent(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr node ATTRIBUTE_UNUSED) { + xmlNodePtr child; + xmlSchemaTypePtr type, base; + xmlChar *value; + int ret = 0, tmp; + + child = ctxt->node; + type = ctxt->type; + + /* + * Validation Rule: Element Locally Valid (Type): 3.1.3 + */ + value = xmlNodeGetContent(child); + /* xmlSchemaValidateSimpleValue(ctxt, type, value); */ + switch (type->type) { + case XML_SCHEMA_TYPE_RESTRICTION: { + xmlSchemaFacetPtr facet; + + base = type->baseType; + if (base != NULL) { + ret = xmlSchemaValidateSimpleValue(ctxt, base, value); + } else { + TODO + } + if (ret == 0) { + facet = type->facets; + while (facet != NULL) { + tmp = xmlSchemaValidateFacet(base, facet, value, + ctxt->value); + if (tmp != 0) + ret = tmp; + facet = facet->next; + } + } + break; + } + default: + TODO + } + if (value != NULL) + xmlFree(value); + + return(ret); +} + +/** + * xmlSchemaValidateCheckNodeList + * @nodelist: the list of nodes + * + * Check the node list is only made of text nodes and entities pointing + * to text nodes + * + * Returns 1 if true, 0 if false and -1 in case of error + */ +static int +xmlSchemaValidateCheckNodeList(xmlNodePtr nodelist) { + while (nodelist != NULL) { + if (nodelist->type == XML_ENTITY_REF_NODE) { + TODO /* implement recursion in the entity content */ + } + if ((nodelist->type != XML_TEXT_NODE) && + (nodelist->type != XML_COMMENT_NODE) && + (nodelist->type != XML_PI_NODE) && + (nodelist->type != XML_PI_NODE)) { + return(0); + } + nodelist = nodelist->next; + } + return(1); +} + +/** + * xmlSchemaSkipIgnored: + * @ctxt: a schema validation context + * @type: the current type context + * @node: the top node. + * + * Skip ignorable nodes in that context + * + * Returns the new sibling + * number otherwise and -1 in case of internal or API error. + */ +static xmlNodePtr +xmlSchemaSkipIgnored(xmlSchemaValidCtxtPtr ctxt ATTRIBUTE_UNUSED, + xmlSchemaTypePtr type, + xmlNodePtr node) { + int mixed = 0; + /* + * TODO complete and handle entities + */ + mixed = ((type->contentType == XML_SCHEMA_CONTENT_MIXED) || + (type->contentType == XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS)); + while ((node != NULL) && + ((node->type == XML_COMMENT_NODE) || + ((mixed == 1) && (node->type == XML_TEXT_NODE)) || + (((type->contentType == XML_SCHEMA_CONTENT_ELEMENTS) && + (node->type == XML_TEXT_NODE) && + (IS_BLANK_NODE(node)))))) { + node = node->next; + } + return(node); +} + +/** + * xmlSchemaValidateCallback: + * @ctxt: a schema validation context + * @name: the name of the element detected (might be NULL) + * @type: the type + * + * A transition has been made in the automata associated to an element + * content model + */ +static void +xmlSchemaValidateCallback(xmlSchemaValidCtxtPtr ctxt, + const xmlChar *name ATTRIBUTE_UNUSED, + xmlSchemaTypePtr type, + xmlNodePtr node) { + xmlSchemaTypePtr oldtype = ctxt->type; + xmlNodePtr oldnode = ctxt->node; +#ifdef DEBUG_CONTENT + xmlGenericError(xmlGenericErrorContext, + "xmlSchemaValidateCallback: %s, %s, %s\n", + name, type->name, node->name); +#endif + ctxt->type = type; + ctxt->node = node; + xmlSchemaValidateContent(ctxt, node); + ctxt->type = oldtype; + ctxt->node = oldnode; +} + + +#if 0 +/** + * xmlSchemaValidateSimpleRestrictionType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of a restriction type. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateSimpleRestrictionType(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr node) +{ + xmlNodePtr child; + xmlSchemaTypePtr type; + int ret; + + child = ctxt->node; + type = ctxt->type; + + if ((ctxt == NULL) || (type == NULL)) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateSimpleRestrictionType %s\n", + node->name); + return (-1); + } + /* + * Only text and text based entities references shall be found there + */ + ret = xmlSchemaValidateCheckNodeList(child); + if (ret < 0) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateSimpleType %s content\n", + node->name); + return (-1); + } else if (ret == 0) { + ctxt->err = XML_SCHEMAS_ERR_NOTSIMPLE; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s content is not a simple type\n", + node->name); + return (-1); + } + ctxt->type = type->subtypes; + xmlSchemaValidateContent(ctxt, node); + ctxt->type = type; + return (ret); +} +#endif + +/** + * xmlSchemaValidateSimpleType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of an simple type. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateSimpleType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type; + xmlAttrPtr attr; + int ret; + + child = ctxt->node; + type = ctxt->type; + + if ((ctxt == NULL) || (type == NULL)) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateSimpleType %s\n", + node->name); + return(-1); + } + /* + * Only text and text based entities references shall be found there + */ + ret = xmlSchemaValidateCheckNodeList(child); + if (ret < 0) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateSimpleType %s content\n", + node->name); + return(-1); + } else if (ret == 0) { + ctxt->err = XML_SCHEMAS_ERR_NOTSIMPLE; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s content is not a simple type\n", + node->name); + return(-1); + } + /* + * Validation Rule: Element Locally Valid (Type): 3.1.1 + */ + attr = node->properties; + while (attr != NULL) { + if ((attr->ns == NULL) || + (!xmlStrEqual(attr->ns->href, xmlSchemaInstanceNs)) || + ((!xmlStrEqual(attr->name, BAD_CAST"type")) && + (!xmlStrEqual(attr->name, BAD_CAST"nil")) && + (!xmlStrEqual(attr->name, BAD_CAST"schemasLocation")) && + (!xmlStrEqual(attr->name, BAD_CAST"noNamespaceSchemaLocation")))) { + ctxt->err = XML_SCHEMAS_ERR_INVALIDATTR; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: attribute %s should not be present\n", + child->name, attr->name); + return(ctxt->err); + } + } + + ctxt->type = type->subtypes; + ret = xmlSchemaValidateSimpleContent(ctxt, node); + ctxt->type = type; + return(ret); +} + +/** + * xmlSchemaValidateElementType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of an element type. + * Validation Rule: Element Locally Valid (Complex Type) + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateElementType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type; + xmlRegExecCtxtPtr oldregexp; /* cont model of the parent */ + xmlSchemaElementPtr decl; + int ret, attrBase; + + oldregexp = ctxt->regexp; + + child = ctxt->node; + type = ctxt->type; + + if ((ctxt == NULL) || (type == NULL)) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateElementType\n", + node->name); + return(-1); + } + if (child == NULL) { + if (type->minOccurs > 0) { + ctxt->err = XML_SCHEMAS_ERR_MISSING; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: missing child %s\n", + node->name, type->name); + } + return(ctxt->err); + } + + /* + * Verify the element matches + */ + if (!xmlStrEqual(child->name, type->name)) { + ctxt->err = XML_SCHEMAS_ERR_WRONGELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: missing child %s found %s\n", + node->name, type->name, child->name); + return(ctxt->err); + } + /* + * Verify the attributes + */ + attrBase = ctxt->attrBase; + ctxt->attrBase = ctxt->attrNr; + xmlSchemaRegisterAttributes(ctxt, child->properties); + xmlSchemaValidateAttributes(ctxt, child, type->attributes); + /* + * Verify the element content recursively + */ + decl = (xmlSchemaElementPtr) type; + oldregexp = ctxt->regexp; + if (decl->contModel != NULL) { + ctxt->regexp = xmlRegNewExecCtxt(decl->contModel, + (xmlRegExecCallbacks) xmlSchemaValidateCallback, + ctxt); +#ifdef DEBUG_AUTOMATA + xmlGenericError(xmlGenericErrorContext, + "====> %s\n", node->name); +#endif + } + xmlSchemaValidateType(ctxt, child, (xmlSchemaElementPtr)type, + type->subtypes); + + if (decl->contModel != NULL) { + ret = xmlRegExecPushString(ctxt->regexp, NULL, NULL); +#ifdef DEBUG_AUTOMATA + xmlGenericError(xmlGenericErrorContext, + "====> %s : %d\n", node->name, ret); +#endif + if (ret == 0) { + ctxt->err = XML_SCHEMAS_ERR_ELEMCONT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s content check failed\n", + node->name); + } else if (ret < 0) { + ctxt->err = XML_SCHEMAS_ERR_ELEMCONT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s content check failure\n", + node->name); +#ifdef DEBUG_CONTENT + } else { + xmlGenericError(xmlGenericErrorContext, + "Element %s content check succeeded\n", node->name); + +#endif + } + xmlRegFreeExecCtxt(ctxt->regexp); + } + /* + * Verify that all attributes were Schemas-validated + */ + xmlSchemaCheckAttributes(ctxt, node); + ctxt->attrNr = ctxt->attrBase; + ctxt->attrBase = attrBase; + + ctxt->regexp = oldregexp; + + ctxt->node = child; + ctxt->type = type; + return(ctxt->err); +} + +/** + * xmlSchemaValidateBasicType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of an element expected to be a basic type type + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateBasicType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + int ret; + xmlNodePtr child, cur; + xmlSchemaTypePtr type; + xmlChar *value; /* lexical representation */ + + child = ctxt->node; + type = ctxt->type; + + if ((ctxt == NULL) || (type == NULL)) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateBasicType\n", + node->name); + return(-1); + } + /* + * First check the content model of the node. + */ + cur = child; + while (cur != NULL) { + switch (cur->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + break; + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + TODO + break; + case XML_ELEMENT_NODE: + ctxt->err = XML_SCHEMAS_ERR_INVALIDELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: child %s should not be present\n", + node->name, cur->name); + return(ctxt->err); + case XML_ATTRIBUTE_NODE: + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_NAMESPACE_DECL: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + ctxt->err = XML_SCHEMAS_ERR_INVALIDELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: node type %d unexpected here\n", + node->name, cur->type); + return(ctxt->err); + } + cur = cur->next; + } + if (child == NULL) + value = NULL; + else + value = xmlNodeGetContent(child->parent); + + if (ctxt->value != NULL) { + xmlSchemaFreeValue(ctxt->value); + ctxt->value = NULL; + } + ret = xmlSchemaValidatePredefinedType(type, value, &(ctxt->value)); + if (value != NULL) + xmlFree(value); + if (ret != 0) { + ctxt->error(ctxt->userData, + "Element %s: failed to validate basic type %s\n", + node->name, type->name); + } + return(ret); +} + +/** + * xmlSchemaValidateComplexType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of an element expected to be a complex type type + * xmlschema-1.html#cvc-complex-type + * Validation Rule: Element Locally Valid (Complex Type) + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateComplexType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type, subtype; + int ret; + + child = ctxt->node; + type = ctxt->type; + + switch (type->contentType) { + case XML_SCHEMA_CONTENT_EMPTY: + if (child != NULL) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s is supposed to be empty\n", + node->name); + } + if (type->attributes != NULL) { + xmlSchemaValidateAttributes(ctxt, node, type->attributes); + } + subtype = type->subtypes; + while (subtype != NULL) { + ctxt->type = subtype; + xmlSchemaValidateComplexType(ctxt, node); + subtype = subtype->next; + } + break; + case XML_SCHEMA_CONTENT_ELEMENTS: + case XML_SCHEMA_CONTENT_MIXED: + case XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS: + /* + * Skip ignorable nodes in that context + */ + child = xmlSchemaSkipIgnored(ctxt, type, child); + while (child != NULL) { + if (child->type == XML_ELEMENT_NODE) { + ret = xmlRegExecPushString(ctxt->regexp, + child->name, child); +#ifdef DEBUG_AUTOMATA + if (ret < 0) + xmlGenericError(xmlGenericErrorContext, + " --> %s Error\n", child->name); + else + xmlGenericError(xmlGenericErrorContext, + " --> %s\n", child->name); +#endif + } + child = child->next; + /* + * Skip ignorable nodes in that context + */ + child = xmlSchemaSkipIgnored(ctxt, type, child); + } + break; + case XML_SCHEMA_CONTENT_BASIC: { + if (type->subtypes != NULL) { + ctxt->type = type->subtypes; + xmlSchemaValidateComplexType(ctxt, node); + } + if (type->baseType != NULL) { + ctxt->type = type->baseType; + xmlSchemaValidateBasicType(ctxt, node); + } + if (type->attributes != NULL) { + xmlSchemaValidateAttributes(ctxt, node, type->attributes); + } + ctxt->type = type; + break; + } + default: + TODO + xmlGenericError(xmlGenericErrorContext, + "unimplemented content type %d\n", + type->contentType); + } + return(ctxt->err); +} + +/** + * xmlSchemaValidateContent: + * @ctxt: a schema validation context + * @elem: an element + * @type: the type declaration + * + * Validate the content of an element against the type. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateContent(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type; + + child = ctxt->node; + type = ctxt->type; + + xmlSchemaValidateAttributes(ctxt, node, type->attributes); + + switch (type->type) { + case XML_SCHEMA_TYPE_ANY: + /* Any type will do it, fine */ + TODO /* handle recursivity */ + break; + case XML_SCHEMA_TYPE_COMPLEX: + xmlSchemaValidateComplexType(ctxt, node); + break; + case XML_SCHEMA_TYPE_ELEMENT: { + xmlSchemaElementPtr decl = (xmlSchemaElementPtr) type; + /* + * Handle element reference here + */ + if (decl->ref != NULL) { + if (decl->refDecl == NULL) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: element reference %s not resolved\n", + decl->ref); + return(-1); + } + ctxt->type = (xmlSchemaTypePtr) decl->refDecl; + decl = decl->refDecl; + } + xmlSchemaValidateElementType(ctxt, node); + ctxt->type = type; + break; + } + case XML_SCHEMA_TYPE_BASIC: + xmlSchemaValidateBasicType(ctxt, node); + break; + case XML_SCHEMA_TYPE_FACET: + TODO + break; + case XML_SCHEMA_TYPE_SIMPLE: + xmlSchemaValidateSimpleType(ctxt, node); + break; + case XML_SCHEMA_TYPE_SEQUENCE: + TODO + break; + case XML_SCHEMA_TYPE_CHOICE: + TODO + break; + case XML_SCHEMA_TYPE_ALL: + TODO + break; + case XML_SCHEMA_TYPE_SIMPLE_CONTENT: + TODO + break; + case XML_SCHEMA_TYPE_COMPLEX_CONTENT: + TODO + break; + case XML_SCHEMA_TYPE_UR: + TODO + break; + case XML_SCHEMA_TYPE_RESTRICTION: + /*xmlSchemaValidateRestrictionType(ctxt, node); */ + TODO + break; + case XML_SCHEMA_TYPE_EXTENSION: + TODO + break; + case XML_SCHEMA_TYPE_ATTRIBUTE: + TODO + break; + case XML_SCHEMA_TYPE_GROUP: + TODO + break; + case XML_SCHEMA_TYPE_NOTATION: + TODO + break; + case XML_SCHEMA_TYPE_LIST: + TODO + break; + case XML_SCHEMA_TYPE_UNION: + TODO + break; + case XML_SCHEMA_FACET_MININCLUSIVE: + TODO + break; + case XML_SCHEMA_FACET_MINEXCLUSIVE: + TODO + break; + case XML_SCHEMA_FACET_MAXINCLUSIVE: + TODO + break; + case XML_SCHEMA_FACET_MAXEXCLUSIVE: + TODO + break; + case XML_SCHEMA_FACET_TOTALDIGITS: + TODO + break; + case XML_SCHEMA_FACET_FRACTIONDIGITS: + TODO + break; + case XML_SCHEMA_FACET_PATTERN: + TODO + break; + case XML_SCHEMA_FACET_ENUMERATION: + TODO + break; + case XML_SCHEMA_FACET_WHITESPACE: + TODO + break; + case XML_SCHEMA_FACET_LENGTH: + TODO + break; + case XML_SCHEMA_FACET_MAXLENGTH: + TODO + break; + case XML_SCHEMA_FACET_MINLENGTH: + TODO + break; + case XML_SCHEMA_TYPE_ATTRIBUTEGROUP: + TODO + break; + } + xmlSchemaValidateAttributes(ctxt, node, type->attributes); + + if (ctxt->node == NULL) + return(ctxt->err); + ctxt->node = ctxt->node->next; + ctxt->type = type->next; + return(ctxt->err); +} + +/** + * xmlSchemaValidateType: + * @ctxt: a schema validation context + * @elem: an element + * @type: the list of type declarations + * + * Validate the content of an element against the types. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem, + xmlSchemaElementPtr elemDecl, + xmlSchemaTypePtr type) { + xmlChar *nil; + + if ((elem->content == NULL) || (type == NULL) || (elemDecl == NULL)) + return(0); + /* + * 3.3.4 : 2 + */ + if (elemDecl->flags & XML_SCHEMAS_ELEM_ABSTRACT) { + ctxt->err = XML_SCHEMAS_ERR_ISABSTRACT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s is abstract\n", elem->name); + return(ctxt->err); + } + /* + * 3.3.4: 3 + */ + nil = xmlGetNsProp(elem, BAD_CAST "nil", xmlSchemaInstanceNs); + if (elemDecl->flags & XML_SCHEMAS_ELEM_NILLABLE) { + /* 3.3.4: 3.2 */ + if (xmlStrEqual(nil, BAD_CAST "true")) { + if (elem->children != NULL) { + ctxt->err = XML_SCHEMAS_ERR_NOTEMPTY; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s is not empty\n", + elem->name); + return(ctxt->err); + } + if ((elemDecl->flags & XML_SCHEMAS_ELEM_FIXED) && + (elemDecl->value != NULL)) { + ctxt->err = XML_SCHEMAS_ERR_HAVEDEFAULT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Empty element %s cannot get a fixed value\n", + elem->name); + return(ctxt->err); + } + } + } else { + /* 3.3.4: 3.1 */ + if (nil != NULL) { + ctxt->err = XML_SCHEMAS_ERR_NOTNILLABLE; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s with xs:nil but not nillable\n", + elem->name); + xmlFree(nil); + return(ctxt->err); + } + } + + /* TODO 3.3.4: 4 if the element carries xs:type*/ + + ctxt->type = elemDecl->subtypes; + ctxt->node = elem->children; + xmlSchemaValidateContent(ctxt, elem); + xmlSchemaValidateAttributes(ctxt, elem, elemDecl->attributes); + + return(ctxt->err); +} + + +/** + * xmlSchemaValidateAttributes: + * @ctxt: a schema validation context + * @elem: an element + * @attributes: the list of attribute declarations + * + * Validate the attributes of an element. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateAttributes(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem, + xmlSchemaAttributePtr attributes) { + int i, ret; + xmlAttrPtr attr; + xmlChar *value; + xmlSchemaAttributeGroupPtr group = NULL; + + if (attributes == NULL) + return(0); + while (attributes != NULL) { + /* + * Handle attribute groups + */ + if (attributes->type == XML_SCHEMA_TYPE_ATTRIBUTEGROUP) { + group = (xmlSchemaAttributeGroupPtr) attributes; + xmlSchemaValidateAttributes(ctxt, elem, group->attributes); + attributes = group->next; + continue; + } + for (i = ctxt->attrBase;i < ctxt->attrNr;i++) { + attr = ctxt->attr[i].attr; + if (attr == NULL) + continue; + if (!xmlStrEqual(attr->name, attributes->name)) + continue; + /* + * TODO: handle the mess about namespaces here. + */ + if ((attr->ns != NULL) /* || (attributes->ns != NULL) */) { + TODO + } + if (attributes->subtypes == NULL) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: attribute %s type not resolved\n", + attr->name); + continue; + } + value = xmlNodeListGetString(elem->doc, attr->children, 1); + ret = xmlSchemaValidateSimpleValue(ctxt, attributes->subtypes, + value); + if (ret != 0) { + ctxt->err = XML_SCHEMAS_ERR_ATTRINVALID; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "attribute %s on %s does not match type\n", + attr->name, elem->name); + } else { + ctxt->attr[i].state = XML_SCHEMAS_ATTR_CHECKED; + } + if (value != NULL) { + xmlFree(value); + } + } + attributes = attributes->next; + } + return(ctxt->err); +} + +/** + * xmlSchemaValidateElement: + * @ctxt: a schema validation context + * @elem: an element + * + * Validate an element in a tree + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateElement(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem) { + xmlSchemaElementPtr elemDecl; + int ret, attrBase; + + if (elem->ns != NULL) + elemDecl = xmlHashLookup3(ctxt->schema->elemDecl, + elem->name, elem->ns->href, NULL); + else + elemDecl = xmlHashLookup3(ctxt->schema->elemDecl, + elem->name, NULL, NULL); + /* + * 3.3.4 : 1 + */ + if (elemDecl == NULL) { + ctxt->err = XML_SCHEMAS_ERR_UNDECLAREDELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s not declared\n", + elem->name); + return(ctxt->err); + } + if (elemDecl->subtypes == NULL) { + ctxt->err = XML_SCHEMAS_ERR_NOTYPE; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s has no type\n", + elem->name); + return(ctxt->err); + } + /* + * Verify the attributes + */ + attrBase = ctxt->attrBase; + ctxt->attrBase = ctxt->attrNr; + xmlSchemaRegisterAttributes(ctxt, elem->properties); + xmlSchemaValidateAttributes(ctxt, elem, elemDecl->attributes); + /* + * Verify the element content recursively + */ + if (elemDecl->contModel != NULL) { + ctxt->regexp = xmlRegNewExecCtxt(elemDecl->contModel, + (xmlRegExecCallbacks) xmlSchemaValidateCallback, + ctxt); +#ifdef DEBUG_AUTOMATA + xmlGenericError(xmlGenericErrorContext, + "====> %s\n", elem->name); +#endif + } + xmlSchemaValidateType(ctxt, elem, elemDecl, elemDecl->subtypes); + if (elemDecl->contModel != NULL) { + ret = xmlRegExecPushString(ctxt->regexp, NULL, NULL); +#ifdef DEBUG_AUTOMATA + xmlGenericError(xmlGenericErrorContext, + "====> %s : %d\n", elem->name, ret); +#endif + if (ret == 0) { + ctxt->err = XML_SCHEMAS_ERR_ELEMCONT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s content check failed\n", + elem->name); + } else if (ret < 0) { + ctxt->err = XML_SCHEMAS_ERR_ELEMCONT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s content check failed\n", + elem->name); +#ifdef DEBUG_CONTENT + } else { + xmlGenericError(xmlGenericErrorContext, + "Element %s content check succeeded\n", elem->name); + +#endif + } + xmlRegFreeExecCtxt(ctxt->regexp); + } + /* + * Verify that all attributes were Schemas-validated + */ + xmlSchemaCheckAttributes(ctxt, elem); + ctxt->attrNr = ctxt->attrBase; + ctxt->attrBase = attrBase; + + return(ctxt->err); +} + +/** + * xmlSchemaValidateDocument: + * @ctxt: a schema validation context + * @doc: a parsed document tree + * + * Validate a document tree in memory. + * + * Returns 0 if the document is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateDocument(xmlSchemaValidCtxtPtr ctxt, xmlDocPtr doc) { + xmlNodePtr root; + xmlSchemaElementPtr elemDecl; + + root = xmlDocGetRootElement(doc); + if (root == NULL) { + ctxt->err = XML_SCHEMAS_ERR_NOROOT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "document has no root\n"); + return(ctxt->err); + } + if (root->ns != NULL) + elemDecl = xmlHashLookup3(ctxt->schema->elemDecl, + root->name, root->ns->href, NULL); + else + elemDecl = xmlHashLookup3(ctxt->schema->elemDecl, + root->name, NULL, NULL); + if (elemDecl == NULL) { + ctxt->err = XML_SCHEMAS_ERR_UNDECLAREDELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s not declared\n", + root->name); + } else if ((elemDecl->flags & XML_SCHEMAS_ELEM_TOPLEVEL) == 0) { + ctxt->err = XML_SCHEMAS_ERR_NOTTOPLEVEL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Root element %s not toplevel\n", + root->name); + } + /* + * Okay, start the recursive validation + */ + xmlSchemaValidateElement(ctxt, root); + + return(ctxt->err); +} + +/************************************************************************ + * * + * SAX Validation code * + * * + ************************************************************************/ + +/************************************************************************ + * * + * Validation interfaces * + * * + ************************************************************************/ + +/** + * xmlSchemaNewValidCtxt: + * @schema: a precompiled XML Schemas + * + * Create an XML Schemas validation context based on the given schema + * + * Returns the validation context or NULL in case of error + */ +xmlSchemaValidCtxtPtr +xmlSchemaNewValidCtxt(xmlSchemaPtr schema) { + xmlSchemaValidCtxtPtr ret; + + ret = (xmlSchemaValidCtxtPtr) xmlMalloc(sizeof(xmlSchemaValidCtxt)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Failed to allocate new schama validation context\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaValidCtxt)); + ret->schema = schema; + ret->attrNr = 0; + ret->attrMax = 10; + ret->attr = (xmlSchemaAttrStatePtr) xmlMalloc(ret->attrMax * + sizeof(xmlSchemaAttrState)); + if (ret->attr == NULL) { + free(ret); + return(NULL); + } + memset(ret->attr, 0, ret->attrMax * sizeof(xmlSchemaAttrState)); + return (ret); +} + +/** + * xmlSchemaFreeValidCtxt: + * @ctxt: the schema validation context + * + * Free the resources associated to the schema validation context + */ +void +xmlSchemaFreeValidCtxt(xmlSchemaValidCtxtPtr ctxt) { + if (ctxt == NULL) + return; + if (ctxt->attr != NULL) + xmlFree(ctxt->attr); + if (ctxt->value != NULL) + xmlSchemaFreeValue(ctxt->value); + xmlFree(ctxt); +} + +/** + * xmlSchemaSetValidErrors: + * @ctxt: a schema validation context + * @err: the error function + * @warn: the warning function + * @ctx: the functions context + * + * Set the error and warning callback informations + */ +void +xmlSchemaSetValidErrors(xmlSchemaValidCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, void *ctx) { + if (ctxt == NULL) + return; + ctxt->error = err; + ctxt->warning = warn; + ctxt->userData = ctx; +} + +/** + * xmlSchemaValidateDoc: + * @ctxt: a schema validation context + * @doc: a parsed document tree + * + * Validate a document tree in memory. + * + * Returns 0 if the document is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +int +xmlSchemaValidateDoc(xmlSchemaValidCtxtPtr ctxt, xmlDocPtr doc) { + int ret; + + if ((ctxt == NULL) || (doc == NULL)) + return(-1); + + ctxt->doc = doc; + ret = xmlSchemaValidateDocument(ctxt, doc); + return(ret); +} + +/** + * xmlSchemaValidateStream: + * @ctxt: a schema validation context + * @input: the input to use for reading the data + * @enc: an optional encoding information + * @sax: a SAX handler for the resulting events + * @user_data: the context to provide to the SAX handler. + * + * Validate a document tree in memory. + * + * Returns 0 if the document is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +int +xmlSchemaValidateStream(xmlSchemaValidCtxtPtr ctxt, + xmlParserInputBufferPtr input, xmlCharEncoding enc, + xmlSAXHandlerPtr sax, void *user_data) { + if ((ctxt == NULL) || (input == NULL)) + return(-1); + ctxt->input = input; + ctxt->enc = enc; + ctxt->sax = sax; + ctxt->user_data = user_data; + TODO + return(0); +} + +#endif /* LIBXML_SCHEMAS_ENABLED */ diff --git a/bundle/libxml/xmlschemastypes.c b/bundle/libxml/xmlschemastypes.c new file mode 100644 index 0000000000..1be67c8622 --- /dev/null +++ b/bundle/libxml/xmlschemastypes.c @@ -0,0 +1,1880 @@ +/* + * schemastypes.c : implementation of the XML Schema Datatypes + * definition and validity checking + * + * See Copyright for the status of this software. + * + * Daniel Veillard <veillard@redhat.com> + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/hash.h> +#include <libxml/valid.h> + +#include <libxml/xmlschemas.h> +#include <libxml/schemasInternals.h> +#include <libxml/xmlschemastypes.h> + +#ifdef HAVE_MATH_H +#include <math.h> +#endif + +#define DEBUG + +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#define XML_SCHEMAS_NAMESPACE_NAME \ + (const xmlChar *)"http://www.w3.org/2001/XMLSchema" + +typedef enum { + XML_SCHEMAS_UNKNOWN = 0, + XML_SCHEMAS_STRING, + XML_SCHEMAS_NMTOKEN, + XML_SCHEMAS_DECIMAL, + XML_SCHEMAS_TIME, + XML_SCHEMAS_GDAY, + XML_SCHEMAS_GMONTH, + XML_SCHEMAS_GMONTHDAY, + XML_SCHEMAS_GYEAR, + XML_SCHEMAS_GYEARMONTH, + XML_SCHEMAS_DATE, + XML_SCHEMAS_DATETIME, + XML_SCHEMAS_DURATION, + XML_SCHEMAS_FLOAT, + XML_SCHEMAS_DOUBLE, + XML_SCHEMAS_, + XML_SCHEMAS_XXX +} xmlSchemaValType; + +unsigned long powten[10] = { + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000L, + 100000000L, 1000000000L +}; + +/* Date value */ +typedef struct _xmlSchemaValDate xmlSchemaValDate; +typedef xmlSchemaValDate *xmlSchemaValDatePtr; +struct _xmlSchemaValDate { + long year; + unsigned int mon :4; /* 1 <= mon <= 12 */ + unsigned int day :5; /* 1 <= day <= 31 */ + unsigned int hour :5; /* 0 <= hour <= 23 */ + unsigned int min :6; /* 0 <= min <= 59 */ + double sec; + int tz_flag :1; /* is tzo explicitely set? */ + int tzo :11; /* -1440 <= tzo <= 1440 */ +}; + +/* Duration value */ +typedef struct _xmlSchemaValDuration xmlSchemaValDuration; +typedef xmlSchemaValDuration *xmlSchemaValDurationPtr; +struct _xmlSchemaValDuration { + long mon; /* mon stores years also */ + long day; + double sec; /* sec stores min and hour also */ +}; + +typedef struct _xmlSchemaValDecimal xmlSchemaValDecimal; +typedef xmlSchemaValDecimal *xmlSchemaValDecimalPtr; +struct _xmlSchemaValDecimal { + /* would use long long but not portable */ + unsigned long base; + unsigned int extra; + unsigned int sign:1; + int frac:7; + int total:8; +}; + +struct _xmlSchemaVal { + xmlSchemaValType type; + union { + xmlSchemaValDecimal decimal; + xmlSchemaValDate date; + xmlSchemaValDuration dur; + float f; + double d; + } value; +}; + +static int xmlSchemaTypesInitialized = 0; +static xmlHashTablePtr xmlSchemaTypesBank = NULL; + +static xmlSchemaTypePtr xmlSchemaTypeStringDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeAnyTypeDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeAnySimpleTypeDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeDecimalDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeDatetimeDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeDateDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeTimeDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeGYearDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeGYearMonthDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeGDayDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeGMonthDayDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeGMonthDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeDurationDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypePositiveIntegerDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeNonNegativeIntegerDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeNmtoken = NULL; +static xmlSchemaTypePtr xmlSchemaTypeFloatDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeDoubleDef = NULL; + +/* + * xmlSchemaInitBasicType: + * @name: the type name + * + * Initialize one default type + */ +static xmlSchemaTypePtr +xmlSchemaInitBasicType(const char *name) { + xmlSchemaTypePtr ret; + + ret = (xmlSchemaTypePtr) xmlMalloc(sizeof(xmlSchemaType)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Could not initilize type %s: out of memory\n", name); + return(NULL); + } + memset(ret, 0, sizeof(xmlSchemaType)); + ret->name = xmlStrdup((const xmlChar *)name); + ret->type = XML_SCHEMA_TYPE_BASIC; + ret->contentType = XML_SCHEMA_CONTENT_BASIC; + xmlHashAddEntry2(xmlSchemaTypesBank, ret->name, + XML_SCHEMAS_NAMESPACE_NAME, ret); + return(ret); +} + +/* + * xmlSchemaInitTypes: + * + * Initialize the default XML Schemas type library + */ +void +xmlSchemaInitTypes(void) { + if (xmlSchemaTypesInitialized != 0) + return; + xmlSchemaTypesBank = xmlHashCreate(40); + + xmlSchemaTypeStringDef = xmlSchemaInitBasicType("string"); + xmlSchemaTypeAnyTypeDef = xmlSchemaInitBasicType("anyType"); + xmlSchemaTypeAnySimpleTypeDef = xmlSchemaInitBasicType("anySimpleType"); + xmlSchemaTypeDecimalDef = xmlSchemaInitBasicType("decimal"); + xmlSchemaTypeDateDef = xmlSchemaInitBasicType("date"); + xmlSchemaTypeDatetimeDef = xmlSchemaInitBasicType("dateTime"); + xmlSchemaTypeTimeDef = xmlSchemaInitBasicType("time"); + xmlSchemaTypeGYearDef = xmlSchemaInitBasicType("gYear"); + xmlSchemaTypeGYearMonthDef = xmlSchemaInitBasicType("gYearMonth"); + xmlSchemaTypeGMonthDef = xmlSchemaInitBasicType("gMonth"); + xmlSchemaTypeGMonthDayDef = xmlSchemaInitBasicType("gMonthDay"); + xmlSchemaTypeGDayDef = xmlSchemaInitBasicType("gDay"); + xmlSchemaTypeDurationDef = xmlSchemaInitBasicType("duration"); + xmlSchemaTypePositiveIntegerDef = xmlSchemaInitBasicType("positiveInteger"); + xmlSchemaTypeNonNegativeIntegerDef = + xmlSchemaInitBasicType("nonNegativeInteger"); + xmlSchemaTypeNmtoken = xmlSchemaInitBasicType("NMTOKEN"); + xmlSchemaTypeFloatDef = xmlSchemaInitBasicType("float"); + xmlSchemaTypeDoubleDef = xmlSchemaInitBasicType("double"); + + xmlSchemaTypesInitialized = 1; +} + +/** + * xmlSchemaCleanupTypes: + * + * Cleanup the default XML Schemas type library + */ +void +xmlSchemaCleanupTypes(void) { + if (xmlSchemaTypesInitialized == 0) + return; + xmlHashFree(xmlSchemaTypesBank, (xmlHashDeallocator) xmlSchemaFreeType); + xmlSchemaTypesInitialized = 0; +} + +/** + * xmlSchemaNewValue: + * @type: the value type + * + * Allocate a new simple type value + * + * Returns a pointer to the new value or NULL in case of error + */ +static xmlSchemaValPtr +xmlSchemaNewValue(xmlSchemaValType type) { + xmlSchemaValPtr value; + + value = (xmlSchemaValPtr) xmlMalloc(sizeof(xmlSchemaVal)); + if (value == NULL) { + return(NULL); + } + memset(value, 0, sizeof(xmlSchemaVal)); + value->type = type; + return(value); +} + +/** + * xmlSchemaFreeValue: + * @value: the value to free + * + * Cleanup the default XML Schemas type library + */ +void +xmlSchemaFreeValue(xmlSchemaValPtr value) { + if (value == NULL) + return; + xmlFree(value); +} + +/** + * xmlSchemaGetPredefinedType: + * @name: the type name + * @ns: the URI of the namespace usually "http://www.w3.org/2001/XMLSchema" + * + * Lookup a type in the default XML Schemas type library + * + * Returns the type if found, NULL otherwise + */ +xmlSchemaTypePtr +xmlSchemaGetPredefinedType(const xmlChar *name, const xmlChar *ns) { + if (xmlSchemaTypesInitialized == 0) + xmlSchemaInitTypes(); + if (name == NULL) + return(NULL); + return((xmlSchemaTypePtr) xmlHashLookup2(xmlSchemaTypesBank, name, ns)); +} + +/**************************************************************** + * * + * Convenience macros and functions * + * * + ****************************************************************/ + +#define IS_TZO_CHAR(c) \ + ((c == 0) || (c == 'Z') || (c == '+') || (c == '-')) + +#define VALID_YEAR(yr) (yr != 0) +#define VALID_MONTH(mon) ((mon >= 1) && (mon <= 12)) +/* VALID_DAY should only be used when month is unknown */ +#define VALID_DAY(day) ((day >= 1) && (day <= 31)) +#define VALID_HOUR(hr) ((hr >= 0) && (hr <= 23)) +#define VALID_MIN(min) ((min >= 0) && (min <= 59)) +#define VALID_SEC(sec) ((sec >= 0) && (sec < 60)) +#define VALID_TZO(tzo) ((tzo > -1440) && (tzo < 1440)) +#define IS_LEAP(y) \ + (((y % 4 == 0) && (y % 100 != 0)) || (y % 400 == 0)) + +static const long daysInMonth[12] = + { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; +static const long daysInMonthLeap[12] = + { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + +#define MAX_DAYINMONTH(yr,mon) \ + (IS_LEAP(yr) ? daysInMonthLeap[mon - 1] : daysInMonth[mon - 1]) + +#define VALID_MDAY(dt) \ + (IS_LEAP(dt->year) ? \ + (dt->day <= daysInMonthLeap[dt->mon - 1]) : \ + (dt->day <= daysInMonth[dt->mon - 1])) + +#define VALID_DATE(dt) \ + (VALID_YEAR(dt->year) && VALID_MONTH(dt->mon) && VALID_MDAY(dt)) + +#define VALID_TIME(dt) \ + (VALID_HOUR(dt->hour) && VALID_MIN(dt->min) && \ + VALID_SEC(dt->sec) && VALID_TZO(dt->tzo)) + +#define VALID_DATETIME(dt) \ + (VALID_DATE(dt) && VALID_TIME(dt)) + +#define SECS_PER_MIN (60) +#define SECS_PER_HOUR (60 * SECS_PER_MIN) +#define SECS_PER_DAY (24 * SECS_PER_HOUR) + +static const long dayInYearByMonth[12] = + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; +static const long dayInLeapYearByMonth[12] = + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 }; + +#define DAY_IN_YEAR(day, month, year) \ + ((IS_LEAP(year) ? \ + dayInLeapYearByMonth[month - 1] : \ + dayInYearByMonth[month - 1]) + day) + +#ifdef DEBUG +#define DEBUG_DATE(dt) \ + xmlGenericError(xmlGenericErrorContext, \ + "type=%o %04ld-%02u-%02uT%02u:%02u:%03f", \ + dt->type,dt->value.date.year,dt->value.date.mon, \ + dt->value.date.day,dt->value.date.hour,dt->value.date.min, \ + dt->value.date.sec); \ + if (dt->value.date.tz_flag) \ + if (dt->value.date.tzo != 0) \ + xmlGenericError(xmlGenericErrorContext, \ + "%+05d\n",dt->value.date.tzo); \ + else \ + xmlGenericError(xmlGenericErrorContext, "Z\n"); \ + else \ + xmlGenericError(xmlGenericErrorContext,"\n") +#else +#define DEBUG_DATE(dt) +#endif + +/** + * _xmlSchemaParseGYear: + * @dt: pointer to a date structure + * @str: pointer to the string to analyze + * + * Parses a xs:gYear without time zone and fills in the appropriate + * field of the @dt structure. @str is updated to point just after the + * xs:gYear. It is supposed that @dt->year is big enough to contain + * the year. + * + * Returns 0 or the error code + */ +static int +_xmlSchemaParseGYear (xmlSchemaValDatePtr dt, const xmlChar **str) { + const xmlChar *cur = *str, *firstChar; + int isneg = 0, digcnt = 0; + + if (((*cur < '0') || (*cur > '9')) && + (*cur != '-') && (*cur != '+')) + return -1; + + if (*cur == '-') { + isneg = 1; + cur++; + } + + firstChar = cur; + + while ((*cur >= '0') && (*cur <= '9')) { + dt->year = dt->year * 10 + (*cur - '0'); + cur++; + digcnt++; + } + + /* year must be at least 4 digits (CCYY); over 4 + * digits cannot have a leading zero. */ + if ((digcnt < 4) || ((digcnt > 4) && (*firstChar == '0'))) + return 1; + + if (isneg) + dt->year = - dt->year; + + if (!VALID_YEAR(dt->year)) + return 2; + + *str = cur; + return 0; +} + +/** + * PARSE_2_DIGITS: + * @num: the integer to fill in + * @cur: an #xmlChar * + * @invalid: an integer + * + * Parses a 2-digits integer and updates @num with the value. @cur is + * updated to point just after the integer. + * In case of error, @invalid is set to %TRUE, values of @num and + * @cur are undefined. + */ +#define PARSE_2_DIGITS(num, cur, invalid) \ + if ((cur[0] < '0') || (cur[0] > '9') || \ + (cur[1] < '0') || (cur[1] > '9')) \ + invalid = 1; \ + else \ + num = (cur[0] - '0') * 10 + (cur[1] - '0'); \ + cur += 2; + +/** + * PARSE_FLOAT: + * @num: the double to fill in + * @cur: an #xmlChar * + * @invalid: an integer + * + * Parses a float and updates @num with the value. @cur is + * updated to point just after the float. The float must have a + * 2-digits integer part and may or may not have a decimal part. + * In case of error, @invalid is set to %TRUE, values of @num and + * @cur are undefined. + */ +#define PARSE_FLOAT(num, cur, invalid) \ + PARSE_2_DIGITS(num, cur, invalid); \ + if (!invalid && (*cur == '.')) { \ + double mult = 1; \ + cur++; \ + if ((*cur < '0') || (*cur > '9')) \ + invalid = 1; \ + while ((*cur >= '0') && (*cur <= '9')) { \ + mult /= 10; \ + num += (*cur - '0') * mult; \ + cur++; \ + } \ + } + +/** + * _xmlSchemaParseGMonth: + * @dt: pointer to a date structure + * @str: pointer to the string to analyze + * + * Parses a xs:gMonth without time zone and fills in the appropriate + * field of the @dt structure. @str is updated to point just after the + * xs:gMonth. + * + * Returns 0 or the error code + */ +static int +_xmlSchemaParseGMonth (xmlSchemaValDatePtr dt, const xmlChar **str) { + const xmlChar *cur = *str; + int ret = 0; + + PARSE_2_DIGITS(dt->mon, cur, ret); + if (ret != 0) + return ret; + + if (!VALID_MONTH(dt->mon)) + return 2; + + *str = cur; + return 0; +} + +/** + * _xmlSchemaParseGDay: + * @dt: pointer to a date structure + * @str: pointer to the string to analyze + * + * Parses a xs:gDay without time zone and fills in the appropriate + * field of the @dt structure. @str is updated to point just after the + * xs:gDay. + * + * Returns 0 or the error code + */ +static int +_xmlSchemaParseGDay (xmlSchemaValDatePtr dt, const xmlChar **str) { + const xmlChar *cur = *str; + int ret = 0; + + PARSE_2_DIGITS(dt->day, cur, ret); + if (ret != 0) + return ret; + + if (!VALID_DAY(dt->day)) + return 2; + + *str = cur; + return 0; +} + +/** + * _xmlSchemaParseTime: + * @dt: pointer to a date structure + * @str: pointer to the string to analyze + * + * Parses a xs:time without time zone and fills in the appropriate + * fields of the @dt structure. @str is updated to point just after the + * xs:time. + * In case of error, values of @dt fields are undefined. + * + * Returns 0 or the error code + */ +static int +_xmlSchemaParseTime (xmlSchemaValDatePtr dt, const xmlChar **str) { + const xmlChar *cur = *str; + unsigned int hour = 0; /* use temp var in case str is not xs:time */ + int ret = 0; + + PARSE_2_DIGITS(hour, cur, ret); + if (ret != 0) + return ret; + + if (*cur != ':') + return 1; + cur++; + + /* the ':' insures this string is xs:time */ + dt->hour = hour; + + PARSE_2_DIGITS(dt->min, cur, ret); + if (ret != 0) + return ret; + + if (*cur != ':') + return 1; + cur++; + + PARSE_FLOAT(dt->sec, cur, ret); + if (ret != 0) + return ret; + + if (!VALID_TIME(dt)) + return 2; + + *str = cur; + return 0; +} + +/** + * _xmlSchemaParseTimeZone: + * @dt: pointer to a date structure + * @str: pointer to the string to analyze + * + * Parses a time zone without time zone and fills in the appropriate + * field of the @dt structure. @str is updated to point just after the + * time zone. + * + * Returns 0 or the error code + */ +static int +_xmlSchemaParseTimeZone (xmlSchemaValDatePtr dt, const xmlChar **str) { + const xmlChar *cur = *str; + int ret = 0; + + if (str == NULL) + return -1; + + switch (*cur) { + case 0: + dt->tz_flag = 0; + dt->tzo = 0; + break; + + case 'Z': + dt->tz_flag = 1; + dt->tzo = 0; + cur++; + break; + + case '+': + case '-': { + int isneg = 0, tmp = 0; + isneg = (*cur == '-'); + + cur++; + + PARSE_2_DIGITS(tmp, cur, ret); + if (ret != 0) + return ret; + if (!VALID_HOUR(tmp)) + return 2; + + if (*cur != ':') + return 1; + cur++; + + dt->tzo = tmp * 60; + + PARSE_2_DIGITS(tmp, cur, ret); + if (ret != 0) + return ret; + if (!VALID_MIN(tmp)) + return 2; + + dt->tzo += tmp; + if (isneg) + dt->tzo = - dt->tzo; + + if (!VALID_TZO(dt->tzo)) + return 2; + + dt->tz_flag = 1; + break; + } + default: + return 1; + } + + *str = cur; + return 0; +} + +/**************************************************************** + * * + * XML Schema Dates/Times Datatypes Handling * + * * + ****************************************************************/ + +/** + * PARSE_DIGITS: + * @num: the integer to fill in + * @cur: an #xmlChar * + * @num_type: an integer flag + * + * Parses a digits integer and updates @num with the value. @cur is + * updated to point just after the integer. + * In case of error, @num_type is set to -1, values of @num and + * @cur are undefined. + */ +#define PARSE_DIGITS(num, cur, num_type) \ + if ((*cur < '0') || (*cur > '9')) \ + num_type = -1; \ + else \ + while ((*cur >= '0') && (*cur <= '9')) { \ + num = num * 10 + (*cur - '0'); \ + cur++; \ + } + +/** + * PARSE_NUM: + * @num: the double to fill in + * @cur: an #xmlChar * + * @num_type: an integer flag + * + * Parses a float or integer and updates @num with the value. @cur is + * updated to point just after the number. If the number is a float, + * then it must have an integer part and a decimal part; @num_type will + * be set to 1. If there is no decimal part, @num_type is set to zero. + * In case of error, @num_type is set to -1, values of @num and + * @cur are undefined. + */ +#define PARSE_NUM(num, cur, num_type) \ + num = 0; \ + PARSE_DIGITS(num, cur, num_type); \ + if (!num_type && (*cur == '.')) { \ + double mult = 1; \ + cur++; \ + if ((*cur < '0') || (*cur > '9')) \ + num_type = -1; \ + else \ + num_type = 1; \ + while ((*cur >= '0') && (*cur <= '9')) { \ + mult /= 10; \ + num += (*cur - '0') * mult; \ + cur++; \ + } \ + } + +/** + * xmlSchemaValidateDates: + * @type: the predefined type + * @dateTime: string to analyze + * @val: the return computed value + * + * Check that @dateTime conforms to the lexical space of one of the date types. + * if true a value is computed and returned in @val. + * + * Returns 0 if this validates, a positive error code number otherwise + * and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateDates (xmlSchemaTypePtr type ATTRIBUTE_UNUSED, + const xmlChar *dateTime, xmlSchemaValPtr *val) { + xmlSchemaValPtr dt; + int ret; + const xmlChar *cur = dateTime; + +#define RETURN_TYPE_IF_VALID(t) \ + if (IS_TZO_CHAR(*cur)) { \ + ret = _xmlSchemaParseTimeZone(&(dt->value.date), &cur); \ + if (ret == 0) { \ + if (*cur != 0) \ + goto error; \ + dt->type = t; \ + if (val != NULL) \ + *val = dt; \ + return 0; \ + } \ + } + + if (dateTime == NULL) + return -1; + + if ((*cur != '-') && (*cur < '0') && (*cur > '9')) + return 1; + + dt = xmlSchemaNewValue(XML_SCHEMAS_UNKNOWN); + if (dt == NULL) + return -1; + + if ((cur[0] == '-') && (cur[1] == '-')) { + /* + * It's an incomplete date (xs:gMonthDay, xs:gMonth or + * xs:gDay) + */ + cur += 2; + + /* is it an xs:gDay? */ + if (*cur == '-') { + ++cur; + ret = _xmlSchemaParseGDay(&(dt->value.date), &cur); + if (ret != 0) + goto error; + + RETURN_TYPE_IF_VALID(XML_SCHEMAS_GDAY); + + goto error; + } + + /* + * it should be an xs:gMonthDay or xs:gMonth + */ + ret = _xmlSchemaParseGMonth(&(dt->value.date), &cur); + if (ret != 0) + goto error; + + if (*cur != '-') + goto error; + cur++; + + /* is it an xs:gMonth? */ + if (*cur == '-') { + cur++; + RETURN_TYPE_IF_VALID(XML_SCHEMAS_GMONTH); + goto error; + } + + /* it should be an xs:gMonthDay */ + ret = _xmlSchemaParseGDay(&(dt->value.date), &cur); + if (ret != 0) + goto error; + + RETURN_TYPE_IF_VALID(XML_SCHEMAS_GMONTHDAY); + + goto error; + } + + /* + * It's a right-truncated date or an xs:time. + * Try to parse an xs:time then fallback on right-truncated dates. + */ + if ((*cur >= '0') && (*cur <= '9')) { + ret = _xmlSchemaParseTime(&(dt->value.date), &cur); + if (ret == 0) { + /* it's an xs:time */ + RETURN_TYPE_IF_VALID(XML_SCHEMAS_TIME); + } + } + + /* fallback on date parsing */ + cur = dateTime; + + ret = _xmlSchemaParseGYear(&(dt->value.date), &cur); + if (ret != 0) + goto error; + + /* is it an xs:gYear? */ + RETURN_TYPE_IF_VALID(XML_SCHEMAS_GYEAR); + + if (*cur != '-') + goto error; + cur++; + + ret = _xmlSchemaParseGMonth(&(dt->value.date), &cur); + if (ret != 0) + goto error; + + /* is it an xs:gYearMonth? */ + RETURN_TYPE_IF_VALID(XML_SCHEMAS_GYEARMONTH); + + if (*cur != '-') + goto error; + cur++; + + ret = _xmlSchemaParseGDay(&(dt->value.date), &cur); + if ((ret != 0) || !VALID_DATE((&(dt->value.date)))) + goto error; + + /* is it an xs:date? */ + RETURN_TYPE_IF_VALID(XML_SCHEMAS_DATE); + + if (*cur != 'T') + goto error; + cur++; + + /* it should be an xs:dateTime */ + ret = _xmlSchemaParseTime(&(dt->value.date), &cur); + if (ret != 0) + goto error; + + ret = _xmlSchemaParseTimeZone(&(dt->value.date), &cur); + if ((ret != 0) || (*cur != 0) || !VALID_DATETIME((&(dt->value.date)))) + goto error; + + dt->type = XML_SCHEMAS_DATETIME; + + if (val != NULL) + *val = dt; + + return 0; + +error: + if (dt != NULL) + xmlSchemaFreeValue(dt); + return 1; +} + +/** + * xmlSchemaValidateDuration: + * @type: the predefined type + * @duration: string to analyze + * @val: the return computed value + * + * Check that @duration conforms to the lexical space of the duration type. + * if true a value is computed and returned in @val. + * + * Returns 0 if this validates, a positive error code number otherwise + * and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateDuration (xmlSchemaTypePtr type ATTRIBUTE_UNUSED, + const xmlChar *duration, xmlSchemaValPtr *val) { + const xmlChar *cur = duration; + xmlSchemaValPtr dur; + int isneg = 0; + unsigned int seq = 0; + + if (duration == NULL) + return -1; + + if (*cur == '-') { + isneg = 1; + cur++; + } + + /* duration must start with 'P' (after sign) */ + if (*cur++ != 'P') + return 1; + + dur = xmlSchemaNewValue(XML_SCHEMAS_DURATION); + if (dur == NULL) + return -1; + + while (*cur != 0) { + double num; + int num_type = 0; /* -1 = invalid, 0 = int, 1 = floating */ + const xmlChar desig[] = {'Y', 'M', 'D', 'H', 'M', 'S'}; + const double multi[] = { 0.0, 0.0, 86400.0, 3600.0, 60.0, 1.0, 0.0}; + + /* input string should be empty or invalid date/time item */ + if (seq >= sizeof(desig)) + goto error; + + /* T designator must be present for time items */ + if (*cur == 'T') { + if (seq <= 3) { + seq = 3; + cur++; + } else + return 1; + } else if (seq == 3) + goto error; + + /* parse the number portion of the item */ + PARSE_NUM(num, cur, num_type); + + if ((num_type == -1) || (*cur == 0)) + goto error; + + /* update duration based on item type */ + while (seq < sizeof(desig)) { + if (*cur == desig[seq]) { + + /* verify numeric type; only seconds can be float */ + if ((num_type != 0) && (seq < (sizeof(desig)-1))) + goto error; + + switch (seq) { + case 0: + dur->value.dur.mon = (long)num * 12; + break; + case 1: + dur->value.dur.mon += (long)num; + break; + default: + /* convert to seconds using multiplier */ + dur->value.dur.sec += num * multi[seq]; + seq++; + break; + } + + break; /* exit loop */ + } + /* no date designators found? */ + if (++seq == 3) + goto error; + } + cur++; + } + + if (isneg) { + dur->value.dur.mon = -dur->value.dur.mon; + dur->value.dur.day = -dur->value.dur.day; + dur->value.dur.sec = -dur->value.dur.sec; + } + + if (val != NULL) + *val = dur; + + return 0; + +error: + if (dur != NULL) + xmlSchemaFreeValue(dur); + return 1; +} + +/** + * xmlSchemaValidatePredefinedType: + * @type: the predefined type + * @value: the value to check + * @val: the return computed value + * + * Check that a value conforms to the lexical space of the predefined type. + * if true a value is computed and returned in @val. + * + * Returns 0 if this validates, a positive error code number otherwise + * and -1 in case of internal or API error. + */ +int +xmlSchemaValidatePredefinedType(xmlSchemaTypePtr type, const xmlChar *value, + xmlSchemaValPtr *val) { + xmlSchemaValPtr v; + + if (xmlSchemaTypesInitialized == 0) + return(-1); + if (type == NULL) + return(-1); + + if (val != NULL) + *val = NULL; + if (type == xmlSchemaTypeStringDef) { + return(0); + } else if (type == xmlSchemaTypeAnyTypeDef) { + return(0); + } else if (type == xmlSchemaTypeAnySimpleTypeDef) { + return(0); + } else if (type == xmlSchemaTypeNmtoken) { + if (xmlValidateNmtokenValue(value)) + return(0); + return(1); + } else if (type == xmlSchemaTypeDecimalDef) { + const xmlChar *cur = value, *tmp; + int frac = 0, len, neg = 0; + unsigned long base = 0; + if (cur == NULL) + return(1); + if (*cur == '+') + cur++; + else if (*cur == '-') { + neg = 1; + cur++; + } + tmp = cur; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + cur++; + } + len = cur - tmp; + if (*cur == '.') { + cur++; + tmp = cur; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + cur++; + } + frac = cur - tmp; + } + if (*cur != 0) + return(1); + if (val != NULL) { + v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL); + if (v != NULL) { + v->value.decimal.base = base; + v->value.decimal.sign = neg; + v->value.decimal.frac = frac; + v->value.decimal.total = frac + len; + *val = v; + } + } + return(0); + } else if (type == xmlSchemaTypeDurationDef) { + return xmlSchemaValidateDuration(type, value, val); + } else if ((type == xmlSchemaTypeDatetimeDef) || + (type == xmlSchemaTypeTimeDef) || + (type == xmlSchemaTypeDateDef) || + (type == xmlSchemaTypeGYearDef) || + (type == xmlSchemaTypeGYearMonthDef) || + (type == xmlSchemaTypeGMonthDef) || + (type == xmlSchemaTypeGMonthDayDef) || + (type == xmlSchemaTypeGDayDef)) { + return xmlSchemaValidateDates(type, value, val); + } else if (type == xmlSchemaTypePositiveIntegerDef) { + const xmlChar *cur = value; + unsigned long base = 0; + int total = 0; + if (cur == NULL) + return(1); + if (*cur == '+') + cur++; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + total++; + cur++; + } + if (*cur != 0) + return(1); + if (val != NULL) { + v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL); + if (v != NULL) { + v->value.decimal.base = base; + v->value.decimal.sign = 0; + v->value.decimal.frac = 0; + v->value.decimal.total = total; + *val = v; + } + } + return(0); + } else if (type == xmlSchemaTypeNonNegativeIntegerDef) { + const xmlChar *cur = value; + unsigned long base = 0; + int total = 0; + int sign = 0; + if (cur == NULL) + return(1); + if (*cur == '-') { + sign = 1; + cur++; + } else if (*cur == '+') + cur++; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + total++; + cur++; + } + if (*cur != 0) + return(1); + if ((sign == 1) && (base != 0)) + return(1); + if (val != NULL) { + v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL); + if (v != NULL) { + v->value.decimal.base = base; + v->value.decimal.sign = 0; + v->value.decimal.frac = 0; + v->value.decimal.total = total; + *val = v; + } + } + return(0); + } else if (type == xmlSchemaTypeFloatDef) { + const xmlChar *cur = value, *tmp; + int frac = 0, len, neg = 0; + unsigned long base = 0; + if (cur == NULL) + return(1); + if (*cur == '+') + cur++; + else if (*cur == '-') { + neg = 1; + cur++; + } + tmp = cur; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + cur++; + } + len = cur - tmp; + if (*cur == '.') { + cur++; + tmp = cur; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + cur++; + } + frac = cur - tmp; + } + TODO + return(0); + } else if (type == xmlSchemaTypeDoubleDef) { + TODO + return(0); + } else { + TODO + return(0); + } +} + +/** + * xmlSchemaCompareDecimals: + * @x: a first decimal value + * @y: a second decimal value + * + * Compare 2 decimals + * + * Returns -1 if x < y, 0 if x == y, 1 if x > y and -2 in case of error + */ +static int +xmlSchemaCompareDecimals(xmlSchemaValPtr x, xmlSchemaValPtr y) +{ + xmlSchemaValPtr swp; + int order = 1; + unsigned long tmp; + + if ((x->value.decimal.sign) && (x->value.decimal.sign)) + order = -1; + else if (x->value.decimal.sign) + return (-1); + else if (y->value.decimal.sign) + return (1); + if (x->value.decimal.frac == y->value.decimal.frac) { + if (x->value.decimal.base < y->value.decimal.base) + return (-1); + return (x->value.decimal.base > y->value.decimal.base); + } + if (y->value.decimal.frac > x->value.decimal.frac) { + swp = y; + y = x; + x = swp; + order = -order; + } + tmp = + x->value.decimal.base / powten[x->value.decimal.frac - + y->value.decimal.frac]; + if (tmp > y->value.decimal.base) + return (order); + if (tmp < y->value.decimal.base) + return (-order); + tmp = + y->value.decimal.base * powten[x->value.decimal.frac - + y->value.decimal.frac]; + if (x->value.decimal.base < tmp) + return (-order); + if (x->value.decimal.base == tmp) + return (0); + return (order); +} + +/** + * xmlSchemaCompareDurations: + * @x: a first duration value + * @y: a second duration value + * + * Compare 2 durations + * + * Returns -1 if x < y, 0 if x == y, 1 if x > y, 2 if x <> y, and -2 in + * case of error + */ +static int +xmlSchemaCompareDurations(xmlSchemaValPtr x, xmlSchemaValPtr y) +{ + long carry, mon, day; + double sec; + long xmon, xday, myear, lyear, minday, maxday; + static const long dayRange [2][12] = { + { 0, 28, 59, 89, 120, 150, 181, 212, 242, 273, 303, 334, }, + { 0, 31, 62, 92, 123, 153, 184, 215, 245, 276, 306, 337} }; + + if ((x == NULL) || (y == NULL)) + return -2; + + /* months */ + mon = x->value.dur.mon - y->value.dur.mon; + + /* seconds */ + sec = x->value.dur.sec - y->value.dur.sec; + carry = (long)sec / SECS_PER_DAY; + sec -= (double)(carry * SECS_PER_DAY); + + /* days */ + day = x->value.dur.day - y->value.dur.day + carry; + + /* easy test */ + if (mon == 0) { + if (day == 0) + if (sec == 0.0) + return 0; + else if (sec < 0.0) + return -1; + else + return 1; + else if (day < 0) + return -1; + else + return 1; + } + + if (mon > 0) { + if ((day >= 0) && (sec >= 0.0)) + return 1; + else { + xmon = mon; + xday = -day; + } + } else if ((day <= 0) && (sec <= 0.0)) { + return -1; + } else { + xmon = -mon; + xday = day; + } + + myear = xmon / 12; + lyear = myear / 4; + minday = (myear * 365) + (lyear != 0 ? lyear - 1 : 0); + maxday = (myear * 365) + (lyear != 0 ? lyear + 1 : 0); + + xmon = xmon % 12; + minday += dayRange[0][xmon]; + maxday += dayRange[1][xmon]; + + if (maxday < xday) + return 1; + else if (minday > xday) + return -1; + + /* indeterminate */ + return 2; +} + +/* + * macros for adding date/times and durations + */ +#define FQUOTIENT(a,b) (floor(((double)a/(double)b))) +#define MODULO(a,b) (a - FQUOTIENT(a,b) * b) +#define FQUOTIENT_RANGE(a,low,high) (FQUOTIENT((a-low),(high-low))) +#define MODULO_RANGE(a,low,high) ((MODULO((a-low),(high-low)))+low) + +/** + * _xmlSchemaDateAdd: + * @dt: an #xmlSchemaValPtr + * @dur: an #xmlSchemaValPtr of type #XS_DURATION + * + * Compute a new date/time from @dt and @dur. This function assumes @dt + * is either #XML_SCHEMAS_DATETIME, #XML_SCHEMAS_DATE, #XML_SCHEMAS_GYEARMONTH, + * or #XML_SCHEMAS_GYEAR. + * + * Returns date/time pointer or NULL. + */ +static xmlSchemaValPtr +_xmlSchemaDateAdd (xmlSchemaValPtr dt, xmlSchemaValPtr dur) +{ + xmlSchemaValPtr ret; + long carry, tempdays, temp; + xmlSchemaValDatePtr r, d; + xmlSchemaValDurationPtr u; + + if ((dt == NULL) || (dur == NULL)) + return NULL; + + ret = xmlSchemaNewValue(dt->type); + if (ret == NULL) + return NULL; + + r = &(ret->value.date); + d = &(dt->value.date); + u = &(dur->value.dur); + + /* normalization */ + if (d->mon == 0) + d->mon = 1; + + /* normalize for time zone offset */ + u->sec -= (d->tzo * 60); + d->tzo = 0; + + /* normalization */ + if (d->day == 0) + d->day = 1; + + /* month */ + carry = d->mon + u->mon; + r->mon = MODULO_RANGE(carry, 1, 13); + carry = FQUOTIENT_RANGE(carry, 1, 13); + + /* year (may be modified later) */ + r->year = d->year + carry; + if (r->year == 0) { + if (d->year > 0) + r->year--; + else + r->year++; + } + + /* time zone */ + r->tzo = d->tzo; + r->tz_flag = d->tz_flag; + + /* seconds */ + r->sec = d->sec + u->sec; + carry = FQUOTIENT((long)r->sec, 60); + if (r->sec != 0.0) { + r->sec = MODULO(r->sec, 60.0); + } + + /* minute */ + carry += d->min; + r->min = MODULO(carry, 60); + carry = FQUOTIENT(carry, 60); + + /* hours */ + carry += d->hour; + r->hour = MODULO(carry, 24); + carry = FQUOTIENT(carry, 24); + + /* + * days + * Note we use tempdays because the temporary values may need more + * than 5 bits + */ + if ((VALID_YEAR(r->year)) && (VALID_MONTH(r->mon)) && + (d->day > MAX_DAYINMONTH(r->year, r->mon))) + tempdays = MAX_DAYINMONTH(r->year, r->mon); + else if (d->day < 1) + tempdays = 1; + else + tempdays = d->day; + + tempdays += u->day + carry; + + while (1) { + if (tempdays < 1) { + long tmon = MODULO_RANGE(r->mon-1, 1, 13); + long tyr = r->year + FQUOTIENT_RANGE(r->mon-1, 1, 13); + if (tyr == 0) + tyr--; + tempdays += MAX_DAYINMONTH(tyr, tmon); + carry = -1; + } else if (tempdays > MAX_DAYINMONTH(r->year, r->mon)) { + tempdays = tempdays - MAX_DAYINMONTH(r->year, r->mon); + carry = 1; + } else + break; + + temp = r->mon + carry; + r->mon = MODULO_RANGE(temp, 1, 13); + r->year = r->year + FQUOTIENT_RANGE(temp, 1, 13); + if (r->year == 0) { + if (temp < 1) + r->year--; + else + r->year++; + } + } + + r->day = tempdays; + + /* + * adjust the date/time type to the date values + */ + if (ret->type != XML_SCHEMAS_DATETIME) { + if ((r->hour) || (r->min) || (r->sec)) + ret->type = XML_SCHEMAS_DATETIME; + else if (ret->type != XML_SCHEMAS_DATE) { + if ((r->mon != 1) && (r->day != 1)) + ret->type = XML_SCHEMAS_DATE; + else if ((ret->type != XML_SCHEMAS_GYEARMONTH) && (r->mon != 1)) + ret->type = XML_SCHEMAS_GYEARMONTH; + } + } + + return ret; +} + +/** + * xmlSchemaDupVal: + * @v: value to duplicate + * + * returns a duplicated value. + */ +static xmlSchemaValPtr +xmlSchemaDupVal (xmlSchemaValPtr v) +{ + xmlSchemaValPtr ret = xmlSchemaNewValue(v->type); + if (ret == NULL) + return ret; + + memcpy(ret, v, sizeof(xmlSchemaVal)); + return ret; +} + +/** + * xmlSchemaDateNormalize: + * @dt: an #xmlSchemaValPtr + * + * Normalize @dt to GMT time. + * + */ +static xmlSchemaValPtr +xmlSchemaDateNormalize (xmlSchemaValPtr dt, double offset) +{ + xmlSchemaValPtr dur, ret; + + if (dt == NULL) + return NULL; + + if (((dt->type != XML_SCHEMAS_TIME) && + (dt->type != XML_SCHEMAS_DATETIME)) || (dt->value.date.tzo == 0)) + return xmlSchemaDupVal(dt); + + dur = xmlSchemaNewValue(XML_SCHEMAS_DURATION); + if (dur == NULL) + return NULL; + + dur->value.date.sec -= offset; + + ret = _xmlSchemaDateAdd(dt, dur); + if (ret == NULL) + return NULL; + + xmlSchemaFreeValue(dur); + + /* ret->value.date.tzo = 0; */ + return ret; +} + +/** + * _xmlSchemaDateCastYMToDays: + * @dt: an #xmlSchemaValPtr + * + * Convert mon and year of @dt to total number of days. Take the + * number of years since (or before) 1 AD and add the number of leap + * years. This is a function because negative + * years must be handled a little differently and there is no zero year. + * + * Returns number of days. + */ +static long +_xmlSchemaDateCastYMToDays (const xmlSchemaValPtr dt) +{ + long ret; + + if (dt->value.date.year < 0) + ret = (dt->value.date.year * 365) + + (((dt->value.date.year+1)/4)-((dt->value.date.year+1)/100)+ + ((dt->value.date.year+1)/400)) + + DAY_IN_YEAR(0, dt->value.date.mon, dt->value.date.year); + else + ret = ((dt->value.date.year-1) * 365) + + (((dt->value.date.year-1)/4)-((dt->value.date.year-1)/100)+ + ((dt->value.date.year-1)/400)) + + DAY_IN_YEAR(0, dt->value.date.mon, dt->value.date.year); + + return ret; +} + +/** + * TIME_TO_NUMBER: + * @dt: an #xmlSchemaValPtr + * + * Calculates the number of seconds in the time portion of @dt. + * + * Returns seconds. + */ +#define TIME_TO_NUMBER(dt) \ + ((double)((dt->value.date.hour * SECS_PER_HOUR) + \ + (dt->value.date.min * SECS_PER_MIN)) + dt->value.date.sec) + +/** + * xmlSchemaCompareDates: + * @x: a first date/time value + * @y: a second date/time value + * + * Compare 2 date/times + * + * Returns -1 if x < y, 0 if x == y, 1 if x > y, 2 if x <> y, and -2 in + * case of error + */ +static int +xmlSchemaCompareDates (xmlSchemaValPtr x, xmlSchemaValPtr y) +{ + unsigned char xmask, ymask, xor_mask, and_mask; + xmlSchemaValPtr p1, p2, q1, q2; + long p1d, p2d, q1d, q2d; + + if ((x == NULL) || (y == NULL)) + return -2; + + if (x->value.date.tz_flag) { + + if (!y->value.date.tz_flag) { + p1 = xmlSchemaDateNormalize(x, 0); + p1d = _xmlSchemaDateCastYMToDays(p1) + p1->value.date.day; + /* normalize y + 14:00 */ + q1 = xmlSchemaDateNormalize(y, (14 * SECS_PER_HOUR)); + + q1d = _xmlSchemaDateCastYMToDays(q1) + q1->value.date.day; + if (p1d < q1d) { + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + return -1; + } else if (p1d == q1d) { + double sec; + + sec = TIME_TO_NUMBER(p1) - TIME_TO_NUMBER(q1); + if (sec < 0.0) { + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + return -1; + } else { + /* normalize y - 14:00 */ + q2 = xmlSchemaDateNormalize(y, -(14 * SECS_PER_HOUR)); + q2d = _xmlSchemaDateCastYMToDays(q2) + q2->value.date.day; + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + xmlSchemaFreeValue(q2); + if (p1d > q2d) + return 1; + else if (p1d == q2d) { + sec = TIME_TO_NUMBER(p1) - TIME_TO_NUMBER(q2); + if (sec > 0.0) + return 1; + else + return 2; /* indeterminate */ + } + } + } else { + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + } + } + } else if (y->value.date.tz_flag) { + q1 = xmlSchemaDateNormalize(y, 0); + q1d = _xmlSchemaDateCastYMToDays(q1) + q1->value.date.day; + + /* normalize x - 14:00 */ + p1 = xmlSchemaDateNormalize(x, -(14 * SECS_PER_HOUR)); + p1d = _xmlSchemaDateCastYMToDays(p1) + p1->value.date.day; + + if (p1d < q1d) { + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + return -1; + } else if (p1d == q1d) { + double sec; + + sec = TIME_TO_NUMBER(p1) - TIME_TO_NUMBER(q1); + if (sec < 0.0) { + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + return -1; + } else { + /* normalize x + 14:00 */ + p2 = xmlSchemaDateNormalize(x, (14 * SECS_PER_HOUR)); + p2d = _xmlSchemaDateCastYMToDays(p2) + p2->value.date.day; + + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + xmlSchemaFreeValue(p2); + if (p2d > q1d) + return 1; + else if (p2d == q1d) { + sec = TIME_TO_NUMBER(p2) - TIME_TO_NUMBER(q1); + if (sec > 0.0) + return 1; + else + return 2; /* indeterminate */ + } + } + } else { + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + } + } + + /* + * if the same type then calculate the difference + */ + if (x->type == y->type) { + q1 = xmlSchemaDateNormalize(y, 0); + q1d = _xmlSchemaDateCastYMToDays(q1) + q1->value.date.day; + + p1 = xmlSchemaDateNormalize(x, 0); + p1d = _xmlSchemaDateCastYMToDays(p1) + p1->value.date.day; + + if (p1d < q1d) { + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + return -1; + } else if (p1d > q1d) { + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + return 1; + } else { + double sec; + + sec = TIME_TO_NUMBER(p1) - TIME_TO_NUMBER(q1); + xmlSchemaFreeValue(p1); + xmlSchemaFreeValue(q1); + if (sec < 0.0) + return -1; + else if (sec > 0.0) + return 1; + + } + return 0; + } + + switch (x->type) { + case XML_SCHEMAS_DATETIME: + xmask = 0xf; + break; + case XML_SCHEMAS_DATE: + xmask = 0x7; + break; + case XML_SCHEMAS_GYEAR: + xmask = 0x1; + break; + case XML_SCHEMAS_GMONTH: + xmask = 0x2; + break; + case XML_SCHEMAS_GDAY: + xmask = 0x3; + break; + case XML_SCHEMAS_GYEARMONTH: + xmask = 0x3; + break; + case XML_SCHEMAS_GMONTHDAY: + xmask = 0x6; + break; + case XML_SCHEMAS_TIME: + xmask = 0x8; + break; + default: + xmask = 0; + break; + } + + switch (y->type) { + case XML_SCHEMAS_DATETIME: + ymask = 0xf; + break; + case XML_SCHEMAS_DATE: + ymask = 0x7; + break; + case XML_SCHEMAS_GYEAR: + ymask = 0x1; + break; + case XML_SCHEMAS_GMONTH: + ymask = 0x2; + break; + case XML_SCHEMAS_GDAY: + ymask = 0x3; + break; + case XML_SCHEMAS_GYEARMONTH: + ymask = 0x3; + break; + case XML_SCHEMAS_GMONTHDAY: + ymask = 0x6; + break; + case XML_SCHEMAS_TIME: + ymask = 0x8; + break; + default: + ymask = 0; + break; + } + + xor_mask = xmask ^ ymask; /* mark type differences */ + and_mask = xmask & ymask; /* mark field specification */ + + /* year */ + if (xor_mask & 1) + return 2; /* indeterminate */ + else if (and_mask & 1) { + if (x->value.date.year < y->value.date.year) + return -1; + else if (x->value.date.year > y->value.date.year) + return 1; + } + + /* month */ + if (xor_mask & 2) + return 2; /* indeterminate */ + else if (and_mask & 2) { + if (x->value.date.mon < y->value.date.mon) + return -1; + else if (x->value.date.mon > y->value.date.mon) + return 1; + } + + /* day */ + if (xor_mask & 4) + return 2; /* indeterminate */ + else if (and_mask & 4) { + if (x->value.date.day < y->value.date.day) + return -1; + else if (x->value.date.day > y->value.date.day) + return 1; + } + + /* time */ + if (xor_mask & 8) + return 2; /* indeterminate */ + else if (and_mask & 8) { + if (x->value.date.hour < y->value.date.hour) + return -1; + else if (x->value.date.hour > y->value.date.hour) + return 1; + else if (x->value.date.min < y->value.date.min) + return -1; + else if (x->value.date.min > y->value.date.min) + return 1; + else if (x->value.date.sec < y->value.date.sec) + return -1; + else if (x->value.date.sec > y->value.date.sec) + return 1; + } + + return 0; +} + +/** + * xmlSchemaCompareValues: + * @x: a first value + * @y: a second value + * + * Compare 2 values + * + * Returns -1 if x < y, 0 if x == y, 1 if x > y, 2 if x <> y, and -2 in + * case of error + */ +static int +xmlSchemaCompareValues(xmlSchemaValPtr x, xmlSchemaValPtr y) { + if ((x == NULL) || (y == NULL)) + return(-2); + + switch (x->type) { + case XML_SCHEMAS_STRING: + TODO + case XML_SCHEMAS_DECIMAL: + if (y->type == XML_SCHEMAS_DECIMAL) + return(xmlSchemaCompareDecimals(x, y)); + return(-2); + case XML_SCHEMAS_DURATION: + if (y->type == XML_SCHEMAS_DURATION) + return(xmlSchemaCompareDurations(x, y)); + return(-2); + case XML_SCHEMAS_TIME: + case XML_SCHEMAS_GDAY: + case XML_SCHEMAS_GMONTH: + case XML_SCHEMAS_GMONTHDAY: + case XML_SCHEMAS_GYEAR: + case XML_SCHEMAS_GYEARMONTH: + case XML_SCHEMAS_DATE: + case XML_SCHEMAS_DATETIME: + if ((y->type == XML_SCHEMAS_DATETIME) || + (y->type == XML_SCHEMAS_TIME) || + (y->type == XML_SCHEMAS_GDAY) || + (y->type == XML_SCHEMAS_GMONTH) || + (y->type == XML_SCHEMAS_GMONTHDAY) || + (y->type == XML_SCHEMAS_GYEAR) || + (y->type == XML_SCHEMAS_DATE) || + (y->type == XML_SCHEMAS_GYEARMONTH)) + return (xmlSchemaCompareDates(x, y)); + + return (-2); + default: + TODO + } + return -2; +} + +/** + * xmlSchemaValidateFacet: + * @base: the base type + * @facet: the facet to check + * @value: the lexical repr of the value to validate + * @val: the precomputed value + * + * Check a value against a facet condition + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +int +xmlSchemaValidateFacet(xmlSchemaTypePtr base ATTRIBUTE_UNUSED, + xmlSchemaFacetPtr facet, + const xmlChar *value, xmlSchemaValPtr val) +{ + int ret; + + switch (facet->type) { + case XML_SCHEMA_FACET_PATTERN: + ret = xmlRegexpExec(facet->regexp, value); + if (ret == 1) + return(0); + if (ret == 0) { + TODO /* error code */ + return(1); + } + return(ret); + case XML_SCHEMA_FACET_MAXEXCLUSIVE: + ret = xmlSchemaCompareValues(val, facet->val); + if (ret == -2) { + TODO /* error code */ + return(-1); + } + if (ret == -1) + return(0); + /* error code */ + return(1); + case XML_SCHEMA_FACET_MAXINCLUSIVE: + ret = xmlSchemaCompareValues(val, facet->val); + if (ret == -2) { + TODO /* error code */ + return(-1); + } + if ((ret == -1) || (ret == 0)) + return(0); + /* error code */ + return(1); + case XML_SCHEMA_FACET_MINEXCLUSIVE: + ret = xmlSchemaCompareValues(val, facet->val); + if (ret == -2) { + TODO /* error code */ + return(-1); + } + if (ret == 1) + return(0); + /* error code */ + return(1); + case XML_SCHEMA_FACET_MININCLUSIVE: + ret = xmlSchemaCompareValues(val, facet->val); + if (ret == -2) { + TODO /* error code */ + return(-1); + } + if ((ret == 1) || (ret == 0)) + return(0); + /* error code */ + return(1); + case XML_SCHEMA_FACET_WHITESPACE: + TODO /* whitespaces */ + return(0); + case XML_SCHEMA_FACET_MAXLENGTH: + if ((facet->val != NULL) && + (facet->val->type == XML_SCHEMAS_DECIMAL) && + (facet->val->value.decimal.frac == 0)) { + unsigned int len; + + if (facet->val->value.decimal.sign == 1) + return(1); + len = xmlUTF8Strlen(value); + if (len > facet->val->value.decimal.base) + return(1); + return(0); + } + TODO /* error code */ + return(1); + case XML_SCHEMA_FACET_ENUMERATION: + if ((facet->value != NULL) && + (xmlStrEqual(facet->value, value))) + return(0); + return(1); + default: + TODO + } + return(0); +} + +#endif /* LIBXML_SCHEMAS_ENABLED */ diff --git a/bundle/libxml/xmlunicode.c b/bundle/libxml/xmlunicode.c new file mode 100644 index 0000000000..8baf8d18c7 --- /dev/null +++ b/bundle/libxml/xmlunicode.c @@ -0,0 +1,4290 @@ +/* + * xmlunicode.c: this module implements the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html + * using the genUnicode.py Python script. + * + * Generation date: Tue Apr 16 17:28:05 2002 + * Sources: Blocks-4.txt UnicodeData-3.1.0.txt + * Daniel Veillard <veillard@redhat.com> + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_UNICODE_ENABLED + +#include <string.h> +#include <libxml/xmlversion.h> +#include <libxml/xmlunicode.h> + +/** + * xmlUCSIsAlphabeticPresentationForms: + * @code: UCS code point + * + * Check whether the character is part of AlphabeticPresentationForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsAlphabeticPresentationForms(int code) { + return((code >= 0xFB00) && (code <= 0xFB4F)); +} + +/** + * xmlUCSIsArabic: + * @code: UCS code point + * + * Check whether the character is part of Arabic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabic(int code) { + return((code >= 0x0600) && (code <= 0x06FF)); +} + +/** + * xmlUCSIsArabicPresentationFormsA: + * @code: UCS code point + * + * Check whether the character is part of ArabicPresentationForms-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabicPresentationFormsA(int code) { + return((code >= 0xFB50) && (code <= 0xFDFF)); +} + +/** + * xmlUCSIsArabicPresentationFormsB: + * @code: UCS code point + * + * Check whether the character is part of ArabicPresentationForms-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabicPresentationFormsB(int code) { + return((code >= 0xFE70) && (code <= 0xFEFE)); +} + +/** + * xmlUCSIsArmenian: + * @code: UCS code point + * + * Check whether the character is part of Armenian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArmenian(int code) { + return((code >= 0x0530) && (code <= 0x058F)); +} + +/** + * xmlUCSIsArrows: + * @code: UCS code point + * + * Check whether the character is part of Arrows UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArrows(int code) { + return((code >= 0x2190) && (code <= 0x21FF)); +} + +/** + * xmlUCSIsBasicLatin: + * @code: UCS code point + * + * Check whether the character is part of BasicLatin UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBasicLatin(int code) { + return((code >= 0x0000) && (code <= 0x007F)); +} + +/** + * xmlUCSIsBengali: + * @code: UCS code point + * + * Check whether the character is part of Bengali UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBengali(int code) { + return((code >= 0x0980) && (code <= 0x09FF)); +} + +/** + * xmlUCSIsBlockElements: + * @code: UCS code point + * + * Check whether the character is part of BlockElements UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBlockElements(int code) { + return((code >= 0x2580) && (code <= 0x259F)); +} + +/** + * xmlUCSIsBopomofo: + * @code: UCS code point + * + * Check whether the character is part of Bopomofo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBopomofo(int code) { + return((code >= 0x3100) && (code <= 0x312F)); +} + +/** + * xmlUCSIsBopomofoExtended: + * @code: UCS code point + * + * Check whether the character is part of BopomofoExtended UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBopomofoExtended(int code) { + return((code >= 0x31A0) && (code <= 0x31BF)); +} + +/** + * xmlUCSIsBoxDrawing: + * @code: UCS code point + * + * Check whether the character is part of BoxDrawing UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBoxDrawing(int code) { + return((code >= 0x2500) && (code <= 0x257F)); +} + +/** + * xmlUCSIsBraillePatterns: + * @code: UCS code point + * + * Check whether the character is part of BraillePatterns UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBraillePatterns(int code) { + return((code >= 0x2800) && (code <= 0x28FF)); +} + +/** + * xmlUCSIsByzantineMusicalSymbols: + * @code: UCS code point + * + * Check whether the character is part of ByzantineMusicalSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsByzantineMusicalSymbols(int code) { + return((code >= 0x1D000) && (code <= 0x1D0FF)); +} + +/** + * xmlUCSIsCJKCompatibility: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibility UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibility(int code) { + return((code >= 0x3300) && (code <= 0x33FF)); +} + +/** + * xmlUCSIsCJKCompatibilityForms: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityForms(int code) { + return((code >= 0xFE30) && (code <= 0xFE4F)); +} + +/** + * xmlUCSIsCJKCompatibilityIdeographs: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityIdeographs UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityIdeographs(int code) { + return((code >= 0xF900) && (code <= 0xFAFF)); +} + +/** + * xmlUCSIsCJKCompatibilityIdeographsSupplement: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityIdeographsSupplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityIdeographsSupplement(int code) { + return((code >= 0x2F800) && (code <= 0x2FA1F)); +} + +/** + * xmlUCSIsCJKRadicalsSupplement: + * @code: UCS code point + * + * Check whether the character is part of CJKRadicalsSupplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKRadicalsSupplement(int code) { + return((code >= 0x2E80) && (code <= 0x2EFF)); +} + +/** + * xmlUCSIsCJKSymbolsandPunctuation: + * @code: UCS code point + * + * Check whether the character is part of CJKSymbolsandPunctuation UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKSymbolsandPunctuation(int code) { + return((code >= 0x3000) && (code <= 0x303F)); +} + +/** + * xmlUCSIsCJKUnifiedIdeographs: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographs UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographs(int code) { + return((code >= 0x4E00) && (code <= 0x9FFF)); +} + +/** + * xmlUCSIsCJKUnifiedIdeographsExtensionA: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographsExtensionA UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographsExtensionA(int code) { + return((code >= 0x3400) && (code <= 0x4DB5)); +} + +/** + * xmlUCSIsCJKUnifiedIdeographsExtensionB: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographsExtensionB UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographsExtensionB(int code) { + return((code >= 0x20000) && (code <= 0x2A6D6)); +} + +/** + * xmlUCSIsCherokee: + * @code: UCS code point + * + * Check whether the character is part of Cherokee UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCherokee(int code) { + return((code >= 0x13A0) && (code <= 0x13FF)); +} + +/** + * xmlUCSIsCombiningDiacriticalMarks: + * @code: UCS code point + * + * Check whether the character is part of CombiningDiacriticalMarks UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningDiacriticalMarks(int code) { + return((code >= 0x0300) && (code <= 0x036F)); +} + +/** + * xmlUCSIsCombiningHalfMarks: + * @code: UCS code point + * + * Check whether the character is part of CombiningHalfMarks UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningHalfMarks(int code) { + return((code >= 0xFE20) && (code <= 0xFE2F)); +} + +/** + * xmlUCSIsCombiningMarksforSymbols: + * @code: UCS code point + * + * Check whether the character is part of CombiningMarksforSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningMarksforSymbols(int code) { + return((code >= 0x20D0) && (code <= 0x20FF)); +} + +/** + * xmlUCSIsControlPictures: + * @code: UCS code point + * + * Check whether the character is part of ControlPictures UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsControlPictures(int code) { + return((code >= 0x2400) && (code <= 0x243F)); +} + +/** + * xmlUCSIsCurrencySymbols: + * @code: UCS code point + * + * Check whether the character is part of CurrencySymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCurrencySymbols(int code) { + return((code >= 0x20A0) && (code <= 0x20CF)); +} + +/** + * xmlUCSIsCyrillic: + * @code: UCS code point + * + * Check whether the character is part of Cyrillic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCyrillic(int code) { + return((code >= 0x0400) && (code <= 0x04FF)); +} + +/** + * xmlUCSIsDeseret: + * @code: UCS code point + * + * Check whether the character is part of Deseret UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDeseret(int code) { + return((code >= 0x10400) && (code <= 0x1044F)); +} + +/** + * xmlUCSIsDevanagari: + * @code: UCS code point + * + * Check whether the character is part of Devanagari UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDevanagari(int code) { + return((code >= 0x0900) && (code <= 0x097F)); +} + +/** + * xmlUCSIsDingbats: + * @code: UCS code point + * + * Check whether the character is part of Dingbats UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDingbats(int code) { + return((code >= 0x2700) && (code <= 0x27BF)); +} + +/** + * xmlUCSIsEnclosedAlphanumerics: + * @code: UCS code point + * + * Check whether the character is part of EnclosedAlphanumerics UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEnclosedAlphanumerics(int code) { + return((code >= 0x2460) && (code <= 0x24FF)); +} + +/** + * xmlUCSIsEnclosedCJKLettersandMonths: + * @code: UCS code point + * + * Check whether the character is part of EnclosedCJKLettersandMonths UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEnclosedCJKLettersandMonths(int code) { + return((code >= 0x3200) && (code <= 0x32FF)); +} + +/** + * xmlUCSIsEthiopic: + * @code: UCS code point + * + * Check whether the character is part of Ethiopic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEthiopic(int code) { + return((code >= 0x1200) && (code <= 0x137F)); +} + +/** + * xmlUCSIsGeneralPunctuation: + * @code: UCS code point + * + * Check whether the character is part of GeneralPunctuation UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeneralPunctuation(int code) { + return((code >= 0x2000) && (code <= 0x206F)); +} + +/** + * xmlUCSIsGeometricShapes: + * @code: UCS code point + * + * Check whether the character is part of GeometricShapes UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeometricShapes(int code) { + return((code >= 0x25A0) && (code <= 0x25FF)); +} + +/** + * xmlUCSIsGeorgian: + * @code: UCS code point + * + * Check whether the character is part of Georgian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeorgian(int code) { + return((code >= 0x10A0) && (code <= 0x10FF)); +} + +/** + * xmlUCSIsGothic: + * @code: UCS code point + * + * Check whether the character is part of Gothic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGothic(int code) { + return((code >= 0x10330) && (code <= 0x1034F)); +} + +/** + * xmlUCSIsGreek: + * @code: UCS code point + * + * Check whether the character is part of Greek UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGreek(int code) { + return((code >= 0x0370) && (code <= 0x03FF)); +} + +/** + * xmlUCSIsGreekExtended: + * @code: UCS code point + * + * Check whether the character is part of GreekExtended UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGreekExtended(int code) { + return((code >= 0x1F00) && (code <= 0x1FFF)); +} + +/** + * xmlUCSIsGujarati: + * @code: UCS code point + * + * Check whether the character is part of Gujarati UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGujarati(int code) { + return((code >= 0x0A80) && (code <= 0x0AFF)); +} + +/** + * xmlUCSIsGurmukhi: + * @code: UCS code point + * + * Check whether the character is part of Gurmukhi UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGurmukhi(int code) { + return((code >= 0x0A00) && (code <= 0x0A7F)); +} + +/** + * xmlUCSIsHalfwidthandFullwidthForms: + * @code: UCS code point + * + * Check whether the character is part of HalfwidthandFullwidthForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHalfwidthandFullwidthForms(int code) { + return((code >= 0xFF00) && (code <= 0xFFEF)); +} + +/** + * xmlUCSIsHangulCompatibilityJamo: + * @code: UCS code point + * + * Check whether the character is part of HangulCompatibilityJamo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulCompatibilityJamo(int code) { + return((code >= 0x3130) && (code <= 0x318F)); +} + +/** + * xmlUCSIsHangulJamo: + * @code: UCS code point + * + * Check whether the character is part of HangulJamo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulJamo(int code) { + return((code >= 0x1100) && (code <= 0x11FF)); +} + +/** + * xmlUCSIsHangulSyllables: + * @code: UCS code point + * + * Check whether the character is part of HangulSyllables UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulSyllables(int code) { + return((code >= 0xAC00) && (code <= 0xD7A3)); +} + +/** + * xmlUCSIsHebrew: + * @code: UCS code point + * + * Check whether the character is part of Hebrew UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHebrew(int code) { + return((code >= 0x0590) && (code <= 0x05FF)); +} + +/** + * xmlUCSIsHighPrivateUseSurrogates: + * @code: UCS code point + * + * Check whether the character is part of HighPrivateUseSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHighPrivateUseSurrogates(int code) { + return((code >= 0xDB80) && (code <= 0xDBFF)); +} + +/** + * xmlUCSIsHighSurrogates: + * @code: UCS code point + * + * Check whether the character is part of HighSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHighSurrogates(int code) { + return((code >= 0xD800) && (code <= 0xDB7F)); +} + +/** + * xmlUCSIsHiragana: + * @code: UCS code point + * + * Check whether the character is part of Hiragana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHiragana(int code) { + return((code >= 0x3040) && (code <= 0x309F)); +} + +/** + * xmlUCSIsIPAExtensions: + * @code: UCS code point + * + * Check whether the character is part of IPAExtensions UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsIPAExtensions(int code) { + return((code >= 0x0250) && (code <= 0x02AF)); +} + +/** + * xmlUCSIsIdeographicDescriptionCharacters: + * @code: UCS code point + * + * Check whether the character is part of IdeographicDescriptionCharacters UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsIdeographicDescriptionCharacters(int code) { + return((code >= 0x2FF0) && (code <= 0x2FFF)); +} + +/** + * xmlUCSIsKanbun: + * @code: UCS code point + * + * Check whether the character is part of Kanbun UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKanbun(int code) { + return((code >= 0x3190) && (code <= 0x319F)); +} + +/** + * xmlUCSIsKangxiRadicals: + * @code: UCS code point + * + * Check whether the character is part of KangxiRadicals UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKangxiRadicals(int code) { + return((code >= 0x2F00) && (code <= 0x2FDF)); +} + +/** + * xmlUCSIsKannada: + * @code: UCS code point + * + * Check whether the character is part of Kannada UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKannada(int code) { + return((code >= 0x0C80) && (code <= 0x0CFF)); +} + +/** + * xmlUCSIsKatakana: + * @code: UCS code point + * + * Check whether the character is part of Katakana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKatakana(int code) { + return((code >= 0x30A0) && (code <= 0x30FF)); +} + +/** + * xmlUCSIsKhmer: + * @code: UCS code point + * + * Check whether the character is part of Khmer UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKhmer(int code) { + return((code >= 0x1780) && (code <= 0x17FF)); +} + +/** + * xmlUCSIsLao: + * @code: UCS code point + * + * Check whether the character is part of Lao UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLao(int code) { + return((code >= 0x0E80) && (code <= 0x0EFF)); +} + +/** + * xmlUCSIsLatin1Supplement: + * @code: UCS code point + * + * Check whether the character is part of Latin-1Supplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatin1Supplement(int code) { + return((code >= 0x0080) && (code <= 0x00FF)); +} + +/** + * xmlUCSIsLatinExtendedA: + * @code: UCS code point + * + * Check whether the character is part of LatinExtended-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedA(int code) { + return((code >= 0x0100) && (code <= 0x017F)); +} + +/** + * xmlUCSIsLatinExtendedB: + * @code: UCS code point + * + * Check whether the character is part of LatinExtended-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedB(int code) { + return((code >= 0x0180) && (code <= 0x024F)); +} + +/** + * xmlUCSIsLatinExtendedAdditional: + * @code: UCS code point + * + * Check whether the character is part of LatinExtendedAdditional UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedAdditional(int code) { + return((code >= 0x1E00) && (code <= 0x1EFF)); +} + +/** + * xmlUCSIsLetterlikeSymbols: + * @code: UCS code point + * + * Check whether the character is part of LetterlikeSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLetterlikeSymbols(int code) { + return((code >= 0x2100) && (code <= 0x214F)); +} + +/** + * xmlUCSIsLowSurrogates: + * @code: UCS code point + * + * Check whether the character is part of LowSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLowSurrogates(int code) { + return((code >= 0xDC00) && (code <= 0xDFFF)); +} + +/** + * xmlUCSIsMalayalam: + * @code: UCS code point + * + * Check whether the character is part of Malayalam UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMalayalam(int code) { + return((code >= 0x0D00) && (code <= 0x0D7F)); +} + +/** + * xmlUCSIsMathematicalAlphanumericSymbols: + * @code: UCS code point + * + * Check whether the character is part of MathematicalAlphanumericSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMathematicalAlphanumericSymbols(int code) { + return((code >= 0x1D400) && (code <= 0x1D7FF)); +} + +/** + * xmlUCSIsMathematicalOperators: + * @code: UCS code point + * + * Check whether the character is part of MathematicalOperators UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMathematicalOperators(int code) { + return((code >= 0x2200) && (code <= 0x22FF)); +} + +/** + * xmlUCSIsMiscellaneousSymbols: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousSymbols(int code) { + return((code >= 0x2600) && (code <= 0x26FF)); +} + +/** + * xmlUCSIsMiscellaneousTechnical: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousTechnical UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousTechnical(int code) { + return((code >= 0x2300) && (code <= 0x23FF)); +} + +/** + * xmlUCSIsMongolian: + * @code: UCS code point + * + * Check whether the character is part of Mongolian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMongolian(int code) { + return((code >= 0x1800) && (code <= 0x18AF)); +} + +/** + * xmlUCSIsMusicalSymbols: + * @code: UCS code point + * + * Check whether the character is part of MusicalSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMusicalSymbols(int code) { + return((code >= 0x1D100) && (code <= 0x1D1FF)); +} + +/** + * xmlUCSIsMyanmar: + * @code: UCS code point + * + * Check whether the character is part of Myanmar UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMyanmar(int code) { + return((code >= 0x1000) && (code <= 0x109F)); +} + +/** + * xmlUCSIsNumberForms: + * @code: UCS code point + * + * Check whether the character is part of NumberForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsNumberForms(int code) { + return((code >= 0x2150) && (code <= 0x218F)); +} + +/** + * xmlUCSIsOgham: + * @code: UCS code point + * + * Check whether the character is part of Ogham UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOgham(int code) { + return((code >= 0x1680) && (code <= 0x169F)); +} + +/** + * xmlUCSIsOldItalic: + * @code: UCS code point + * + * Check whether the character is part of OldItalic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOldItalic(int code) { + return((code >= 0x10300) && (code <= 0x1032F)); +} + +/** + * xmlUCSIsOpticalCharacterRecognition: + * @code: UCS code point + * + * Check whether the character is part of OpticalCharacterRecognition UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOpticalCharacterRecognition(int code) { + return((code >= 0x2440) && (code <= 0x245F)); +} + +/** + * xmlUCSIsOriya: + * @code: UCS code point + * + * Check whether the character is part of Oriya UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOriya(int code) { + return((code >= 0x0B00) && (code <= 0x0B7F)); +} + +/** + * xmlUCSIsPrivateUse: + * @code: UCS code point + * + * Check whether the character is part of PrivateUse UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsPrivateUse(int code) { + return((code >= 0x100000) && (code <= 0x10FFFD)); +} + +/** + * xmlUCSIsRunic: + * @code: UCS code point + * + * Check whether the character is part of Runic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsRunic(int code) { + return((code >= 0x16A0) && (code <= 0x16FF)); +} + +/** + * xmlUCSIsSinhala: + * @code: UCS code point + * + * Check whether the character is part of Sinhala UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSinhala(int code) { + return((code >= 0x0D80) && (code <= 0x0DFF)); +} + +/** + * xmlUCSIsSmallFormVariants: + * @code: UCS code point + * + * Check whether the character is part of SmallFormVariants UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSmallFormVariants(int code) { + return((code >= 0xFE50) && (code <= 0xFE6F)); +} + +/** + * xmlUCSIsSpacingModifierLetters: + * @code: UCS code point + * + * Check whether the character is part of SpacingModifierLetters UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSpacingModifierLetters(int code) { + return((code >= 0x02B0) && (code <= 0x02FF)); +} + +/** + * xmlUCSIsSpecials: + * @code: UCS code point + * + * Check whether the character is part of Specials UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSpecials(int code) { + return((code >= 0xFFF0) && (code <= 0xFFFD)); +} + +/** + * xmlUCSIsSuperscriptsandSubscripts: + * @code: UCS code point + * + * Check whether the character is part of SuperscriptsandSubscripts UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSuperscriptsandSubscripts(int code) { + return((code >= 0x2070) && (code <= 0x209F)); +} + +/** + * xmlUCSIsSyriac: + * @code: UCS code point + * + * Check whether the character is part of Syriac UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSyriac(int code) { + return((code >= 0x0700) && (code <= 0x074F)); +} + +/** + * xmlUCSIsTags: + * @code: UCS code point + * + * Check whether the character is part of Tags UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTags(int code) { + return((code >= 0xE0000) && (code <= 0xE007F)); +} + +/** + * xmlUCSIsTamil: + * @code: UCS code point + * + * Check whether the character is part of Tamil UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTamil(int code) { + return((code >= 0x0B80) && (code <= 0x0BFF)); +} + +/** + * xmlUCSIsTelugu: + * @code: UCS code point + * + * Check whether the character is part of Telugu UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTelugu(int code) { + return((code >= 0x0C00) && (code <= 0x0C7F)); +} + +/** + * xmlUCSIsThaana: + * @code: UCS code point + * + * Check whether the character is part of Thaana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsThaana(int code) { + return((code >= 0x0780) && (code <= 0x07BF)); +} + +/** + * xmlUCSIsThai: + * @code: UCS code point + * + * Check whether the character is part of Thai UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsThai(int code) { + return((code >= 0x0E00) && (code <= 0x0E7F)); +} + +/** + * xmlUCSIsTibetan: + * @code: UCS code point + * + * Check whether the character is part of Tibetan UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTibetan(int code) { + return((code >= 0x0F00) && (code <= 0x0FFF)); +} + +/** + * xmlUCSIsUnifiedCanadianAboriginalSyllabics: + * @code: UCS code point + * + * Check whether the character is part of UnifiedCanadianAboriginalSyllabics UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsUnifiedCanadianAboriginalSyllabics(int code) { + return((code >= 0x1400) && (code <= 0x167F)); +} + +/** + * xmlUCSIsYiRadicals: + * @code: UCS code point + * + * Check whether the character is part of YiRadicals UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsYiRadicals(int code) { + return((code >= 0xA490) && (code <= 0xA4CF)); +} + +/** + * xmlUCSIsYiSyllables: + * @code: UCS code point + * + * Check whether the character is part of YiSyllables UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsYiSyllables(int code) { + return((code >= 0xA000) && (code <= 0xA48F)); +} + +/** + * xmlUCSIsBlock: + * @code: UCS code point + * @block: UCS block name + * + * Check whether the caracter is part of the UCS Block + * + * Returns 1 if true, 0 if false and -1 on unknown block + */ +int +xmlUCSIsBlock(int code, const char *block) { + if (!strcmp(block, "AlphabeticPresentationForms")) + return(xmlUCSIsAlphabeticPresentationForms(code)); + if (!strcmp(block, "Arabic")) + return(xmlUCSIsArabic(code)); + if (!strcmp(block, "ArabicPresentationForms-A")) + return(xmlUCSIsArabicPresentationFormsA(code)); + if (!strcmp(block, "ArabicPresentationForms-B")) + return(xmlUCSIsArabicPresentationFormsB(code)); + if (!strcmp(block, "Armenian")) + return(xmlUCSIsArmenian(code)); + if (!strcmp(block, "Arrows")) + return(xmlUCSIsArrows(code)); + if (!strcmp(block, "BasicLatin")) + return(xmlUCSIsBasicLatin(code)); + if (!strcmp(block, "Bengali")) + return(xmlUCSIsBengali(code)); + if (!strcmp(block, "BlockElements")) + return(xmlUCSIsBlockElements(code)); + if (!strcmp(block, "Bopomofo")) + return(xmlUCSIsBopomofo(code)); + if (!strcmp(block, "BopomofoExtended")) + return(xmlUCSIsBopomofoExtended(code)); + if (!strcmp(block, "BoxDrawing")) + return(xmlUCSIsBoxDrawing(code)); + if (!strcmp(block, "BraillePatterns")) + return(xmlUCSIsBraillePatterns(code)); + if (!strcmp(block, "ByzantineMusicalSymbols")) + return(xmlUCSIsByzantineMusicalSymbols(code)); + if (!strcmp(block, "CJKCompatibility")) + return(xmlUCSIsCJKCompatibility(code)); + if (!strcmp(block, "CJKCompatibilityForms")) + return(xmlUCSIsCJKCompatibilityForms(code)); + if (!strcmp(block, "CJKCompatibilityIdeographs")) + return(xmlUCSIsCJKCompatibilityIdeographs(code)); + if (!strcmp(block, "CJKCompatibilityIdeographsSupplement")) + return(xmlUCSIsCJKCompatibilityIdeographsSupplement(code)); + if (!strcmp(block, "CJKRadicalsSupplement")) + return(xmlUCSIsCJKRadicalsSupplement(code)); + if (!strcmp(block, "CJKSymbolsandPunctuation")) + return(xmlUCSIsCJKSymbolsandPunctuation(code)); + if (!strcmp(block, "CJKUnifiedIdeographs")) + return(xmlUCSIsCJKUnifiedIdeographs(code)); + if (!strcmp(block, "CJKUnifiedIdeographsExtensionA")) + return(xmlUCSIsCJKUnifiedIdeographsExtensionA(code)); + if (!strcmp(block, "CJKUnifiedIdeographsExtensionB")) + return(xmlUCSIsCJKUnifiedIdeographsExtensionB(code)); + if (!strcmp(block, "Cherokee")) + return(xmlUCSIsCherokee(code)); + if (!strcmp(block, "CombiningDiacriticalMarks")) + return(xmlUCSIsCombiningDiacriticalMarks(code)); + if (!strcmp(block, "CombiningHalfMarks")) + return(xmlUCSIsCombiningHalfMarks(code)); + if (!strcmp(block, "CombiningMarksforSymbols")) + return(xmlUCSIsCombiningMarksforSymbols(code)); + if (!strcmp(block, "ControlPictures")) + return(xmlUCSIsControlPictures(code)); + if (!strcmp(block, "CurrencySymbols")) + return(xmlUCSIsCurrencySymbols(code)); + if (!strcmp(block, "Cyrillic")) + return(xmlUCSIsCyrillic(code)); + if (!strcmp(block, "Deseret")) + return(xmlUCSIsDeseret(code)); + if (!strcmp(block, "Devanagari")) + return(xmlUCSIsDevanagari(code)); + if (!strcmp(block, "Dingbats")) + return(xmlUCSIsDingbats(code)); + if (!strcmp(block, "EnclosedAlphanumerics")) + return(xmlUCSIsEnclosedAlphanumerics(code)); + if (!strcmp(block, "EnclosedCJKLettersandMonths")) + return(xmlUCSIsEnclosedCJKLettersandMonths(code)); + if (!strcmp(block, "Ethiopic")) + return(xmlUCSIsEthiopic(code)); + if (!strcmp(block, "GeneralPunctuation")) + return(xmlUCSIsGeneralPunctuation(code)); + if (!strcmp(block, "GeometricShapes")) + return(xmlUCSIsGeometricShapes(code)); + if (!strcmp(block, "Georgian")) + return(xmlUCSIsGeorgian(code)); + if (!strcmp(block, "Gothic")) + return(xmlUCSIsGothic(code)); + if (!strcmp(block, "Greek")) + return(xmlUCSIsGreek(code)); + if (!strcmp(block, "GreekExtended")) + return(xmlUCSIsGreekExtended(code)); + if (!strcmp(block, "Gujarati")) + return(xmlUCSIsGujarati(code)); + if (!strcmp(block, "Gurmukhi")) + return(xmlUCSIsGurmukhi(code)); + if (!strcmp(block, "HalfwidthandFullwidthForms")) + return(xmlUCSIsHalfwidthandFullwidthForms(code)); + if (!strcmp(block, "HangulCompatibilityJamo")) + return(xmlUCSIsHangulCompatibilityJamo(code)); + if (!strcmp(block, "HangulJamo")) + return(xmlUCSIsHangulJamo(code)); + if (!strcmp(block, "HangulSyllables")) + return(xmlUCSIsHangulSyllables(code)); + if (!strcmp(block, "Hebrew")) + return(xmlUCSIsHebrew(code)); + if (!strcmp(block, "HighPrivateUseSurrogates")) + return(xmlUCSIsHighPrivateUseSurrogates(code)); + if (!strcmp(block, "HighSurrogates")) + return(xmlUCSIsHighSurrogates(code)); + if (!strcmp(block, "Hiragana")) + return(xmlUCSIsHiragana(code)); + if (!strcmp(block, "IPAExtensions")) + return(xmlUCSIsIPAExtensions(code)); + if (!strcmp(block, "IdeographicDescriptionCharacters")) + return(xmlUCSIsIdeographicDescriptionCharacters(code)); + if (!strcmp(block, "Kanbun")) + return(xmlUCSIsKanbun(code)); + if (!strcmp(block, "KangxiRadicals")) + return(xmlUCSIsKangxiRadicals(code)); + if (!strcmp(block, "Kannada")) + return(xmlUCSIsKannada(code)); + if (!strcmp(block, "Katakana")) + return(xmlUCSIsKatakana(code)); + if (!strcmp(block, "Khmer")) + return(xmlUCSIsKhmer(code)); + if (!strcmp(block, "Lao")) + return(xmlUCSIsLao(code)); + if (!strcmp(block, "Latin-1Supplement")) + return(xmlUCSIsLatin1Supplement(code)); + if (!strcmp(block, "LatinExtended-A")) + return(xmlUCSIsLatinExtendedA(code)); + if (!strcmp(block, "LatinExtended-B")) + return(xmlUCSIsLatinExtendedB(code)); + if (!strcmp(block, "LatinExtendedAdditional")) + return(xmlUCSIsLatinExtendedAdditional(code)); + if (!strcmp(block, "LetterlikeSymbols")) + return(xmlUCSIsLetterlikeSymbols(code)); + if (!strcmp(block, "LowSurrogates")) + return(xmlUCSIsLowSurrogates(code)); + if (!strcmp(block, "Malayalam")) + return(xmlUCSIsMalayalam(code)); + if (!strcmp(block, "MathematicalAlphanumericSymbols")) + return(xmlUCSIsMathematicalAlphanumericSymbols(code)); + if (!strcmp(block, "MathematicalOperators")) + return(xmlUCSIsMathematicalOperators(code)); + if (!strcmp(block, "MiscellaneousSymbols")) + return(xmlUCSIsMiscellaneousSymbols(code)); + if (!strcmp(block, "MiscellaneousTechnical")) + return(xmlUCSIsMiscellaneousTechnical(code)); + if (!strcmp(block, "Mongolian")) + return(xmlUCSIsMongolian(code)); + if (!strcmp(block, "MusicalSymbols")) + return(xmlUCSIsMusicalSymbols(code)); + if (!strcmp(block, "Myanmar")) + return(xmlUCSIsMyanmar(code)); + if (!strcmp(block, "NumberForms")) + return(xmlUCSIsNumberForms(code)); + if (!strcmp(block, "Ogham")) + return(xmlUCSIsOgham(code)); + if (!strcmp(block, "OldItalic")) + return(xmlUCSIsOldItalic(code)); + if (!strcmp(block, "OpticalCharacterRecognition")) + return(xmlUCSIsOpticalCharacterRecognition(code)); + if (!strcmp(block, "Oriya")) + return(xmlUCSIsOriya(code)); + if (!strcmp(block, "PrivateUse")) + return(xmlUCSIsPrivateUse(code)); + if (!strcmp(block, "Runic")) + return(xmlUCSIsRunic(code)); + if (!strcmp(block, "Sinhala")) + return(xmlUCSIsSinhala(code)); + if (!strcmp(block, "SmallFormVariants")) + return(xmlUCSIsSmallFormVariants(code)); + if (!strcmp(block, "SpacingModifierLetters")) + return(xmlUCSIsSpacingModifierLetters(code)); + if (!strcmp(block, "Specials")) + return(xmlUCSIsSpecials(code)); + if (!strcmp(block, "SuperscriptsandSubscripts")) + return(xmlUCSIsSuperscriptsandSubscripts(code)); + if (!strcmp(block, "Syriac")) + return(xmlUCSIsSyriac(code)); + if (!strcmp(block, "Tags")) + return(xmlUCSIsTags(code)); + if (!strcmp(block, "Tamil")) + return(xmlUCSIsTamil(code)); + if (!strcmp(block, "Telugu")) + return(xmlUCSIsTelugu(code)); + if (!strcmp(block, "Thaana")) + return(xmlUCSIsThaana(code)); + if (!strcmp(block, "Thai")) + return(xmlUCSIsThai(code)); + if (!strcmp(block, "Tibetan")) + return(xmlUCSIsTibetan(code)); + if (!strcmp(block, "UnifiedCanadianAboriginalSyllabics")) + return(xmlUCSIsUnifiedCanadianAboriginalSyllabics(code)); + if (!strcmp(block, "YiRadicals")) + return(xmlUCSIsYiRadicals(code)); + if (!strcmp(block, "YiSyllables")) + return(xmlUCSIsYiSyllables(code)); + return(-1); +} + +/** + * xmlUCSIsCatC: + * @code: UCS code point + * + * Check whether the character is part of C UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatC(int code) { + return(((code >= 0x0) && (code <= 0x1f)) || + ((code >= 0x7f) && (code <= 0x9f)) || + (code == 0x70f) || + ((code >= 0x180b) && (code <= 0x180e)) || + ((code >= 0x200c) && (code <= 0x200f)) || + ((code >= 0x202a) && (code <= 0x202e)) || + ((code >= 0x206a) && (code <= 0x206f)) || + (code == 0xd800) || + ((code >= 0xdb7f) && (code <= 0xdb80)) || + ((code >= 0xdbff) && (code <= 0xdc00)) || + ((code >= 0xdfff) && (code <= 0xe000)) || + (code == 0xf8ff) || + (code == 0xfeff) || + ((code >= 0xfff9) && (code <= 0xfffb)) || + ((code >= 0x1d173) && (code <= 0x1d17a)) || + (code == 0xe0001) || + ((code >= 0xe0020) && (code <= 0xe007f)) || + (code == 0xf0000) || + (code == 0xffffd) || + (code == 0x100000) || + (code == 0x10fffd)); +} + +/** + * xmlUCSIsCatCc: + * @code: UCS code point + * + * Check whether the character is part of Cc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCc(int code) { + return(((code >= 0x0) && (code <= 0x1f)) || + ((code >= 0x7f) && (code <= 0x9f))); +} + +/** + * xmlUCSIsCatCf: + * @code: UCS code point + * + * Check whether the character is part of Cf UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCf(int code) { + return((code == 0x70f) || + ((code >= 0x180b) && (code <= 0x180e)) || + ((code >= 0x200c) && (code <= 0x200f)) || + ((code >= 0x202a) && (code <= 0x202e)) || + ((code >= 0x206a) && (code <= 0x206f)) || + (code == 0xfeff) || + ((code >= 0xfff9) && (code <= 0xfffb)) || + ((code >= 0x1d173) && (code <= 0x1d17a)) || + (code == 0xe0001) || + ((code >= 0xe0020) && (code <= 0xe007f))); +} + +/** + * xmlUCSIsCatCo: + * @code: UCS code point + * + * Check whether the character is part of Co UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCo(int code) { + return((code == 0xe000) || + (code == 0xf8ff) || + (code == 0xf0000) || + (code == 0xffffd) || + (code == 0x100000) || + (code == 0x10fffd)); +} + +/** + * xmlUCSIsCatCs: + * @code: UCS code point + * + * Check whether the character is part of Cs UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCs(int code) { + return((code == 0xd800) || + ((code >= 0xdb7f) && (code <= 0xdb80)) || + ((code >= 0xdbff) && (code <= 0xdc00)) || + (code == 0xdfff)); +} + +/** + * xmlUCSIsCatL: + * @code: UCS code point + * + * Check whether the character is part of L UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatL(int code) { + return(((code >= 0x41) && (code <= 0x5a)) || + ((code >= 0x61) && (code <= 0x7a)) || + (code == 0xaa) || + (code == 0xb5) || + (code == 0xba) || + ((code >= 0xc0) && (code <= 0xd6)) || + ((code >= 0xd8) && (code <= 0xf6)) || + ((code >= 0xf8) && (code <= 0x21f)) || + ((code >= 0x222) && (code <= 0x233)) || + ((code >= 0x250) && (code <= 0x2ad)) || + ((code >= 0x2b0) && (code <= 0x2b8)) || + ((code >= 0x2bb) && (code <= 0x2c1)) || + ((code >= 0x2d0) && (code <= 0x2d1)) || + ((code >= 0x2e0) && (code <= 0x2e4)) || + (code == 0x2ee) || + (code == 0x37a) || + (code == 0x386) || + ((code >= 0x388) && (code <= 0x38a)) || + (code == 0x38c) || + ((code >= 0x38e) && (code <= 0x3a1)) || + ((code >= 0x3a3) && (code <= 0x3ce)) || + ((code >= 0x3d0) && (code <= 0x3d7)) || + ((code >= 0x3da) && (code <= 0x3f5)) || + ((code >= 0x400) && (code <= 0x481)) || + ((code >= 0x48c) && (code <= 0x4c4)) || + ((code >= 0x4c7) && (code <= 0x4c8)) || + ((code >= 0x4cb) && (code <= 0x4cc)) || + ((code >= 0x4d0) && (code <= 0x4f5)) || + ((code >= 0x4f8) && (code <= 0x4f9)) || + ((code >= 0x531) && (code <= 0x556)) || + (code == 0x559) || + ((code >= 0x561) && (code <= 0x587)) || + ((code >= 0x5d0) && (code <= 0x5ea)) || + ((code >= 0x5f0) && (code <= 0x5f2)) || + ((code >= 0x621) && (code <= 0x63a)) || + ((code >= 0x640) && (code <= 0x64a)) || + ((code >= 0x671) && (code <= 0x6d3)) || + (code == 0x6d5) || + ((code >= 0x6e5) && (code <= 0x6e6)) || + ((code >= 0x6fa) && (code <= 0x6fc)) || + (code == 0x710) || + ((code >= 0x712) && (code <= 0x72c)) || + ((code >= 0x780) && (code <= 0x7a5)) || + ((code >= 0x905) && (code <= 0x939)) || + (code == 0x93d) || + (code == 0x950) || + ((code >= 0x958) && (code <= 0x961)) || + ((code >= 0x985) && (code <= 0x98c)) || + ((code >= 0x98f) && (code <= 0x990)) || + ((code >= 0x993) && (code <= 0x9a8)) || + ((code >= 0x9aa) && (code <= 0x9b0)) || + (code == 0x9b2) || + ((code >= 0x9b6) && (code <= 0x9b9)) || + ((code >= 0x9dc) && (code <= 0x9dd)) || + ((code >= 0x9df) && (code <= 0x9e1)) || + ((code >= 0x9f0) && (code <= 0x9f1)) || + ((code >= 0xa05) && (code <= 0xa0a)) || + ((code >= 0xa0f) && (code <= 0xa10)) || + ((code >= 0xa13) && (code <= 0xa28)) || + ((code >= 0xa2a) && (code <= 0xa30)) || + ((code >= 0xa32) && (code <= 0xa33)) || + ((code >= 0xa35) && (code <= 0xa36)) || + ((code >= 0xa38) && (code <= 0xa39)) || + ((code >= 0xa59) && (code <= 0xa5c)) || + (code == 0xa5e) || + ((code >= 0xa72) && (code <= 0xa74)) || + ((code >= 0xa85) && (code <= 0xa8b)) || + (code == 0xa8d) || + ((code >= 0xa8f) && (code <= 0xa91)) || + ((code >= 0xa93) && (code <= 0xaa8)) || + ((code >= 0xaaa) && (code <= 0xab0)) || + ((code >= 0xab2) && (code <= 0xab3)) || + ((code >= 0xab5) && (code <= 0xab9)) || + (code == 0xabd) || + (code == 0xad0) || + (code == 0xae0) || + ((code >= 0xb05) && (code <= 0xb0c)) || + ((code >= 0xb0f) && (code <= 0xb10)) || + ((code >= 0xb13) && (code <= 0xb28)) || + ((code >= 0xb2a) && (code <= 0xb30)) || + ((code >= 0xb32) && (code <= 0xb33)) || + ((code >= 0xb36) && (code <= 0xb39)) || + (code == 0xb3d) || + ((code >= 0xb5c) && (code <= 0xb5d)) || + ((code >= 0xb5f) && (code <= 0xb61)) || + ((code >= 0xb85) && (code <= 0xb8a)) || + ((code >= 0xb8e) && (code <= 0xb90)) || + ((code >= 0xb92) && (code <= 0xb95)) || + ((code >= 0xb99) && (code <= 0xb9a)) || + (code == 0xb9c) || + ((code >= 0xb9e) && (code <= 0xb9f)) || + ((code >= 0xba3) && (code <= 0xba4)) || + ((code >= 0xba8) && (code <= 0xbaa)) || + ((code >= 0xbae) && (code <= 0xbb5)) || + ((code >= 0xbb7) && (code <= 0xbb9)) || + ((code >= 0xc05) && (code <= 0xc0c)) || + ((code >= 0xc0e) && (code <= 0xc10)) || + ((code >= 0xc12) && (code <= 0xc28)) || + ((code >= 0xc2a) && (code <= 0xc33)) || + ((code >= 0xc35) && (code <= 0xc39)) || + ((code >= 0xc60) && (code <= 0xc61)) || + ((code >= 0xc85) && (code <= 0xc8c)) || + ((code >= 0xc8e) && (code <= 0xc90)) || + ((code >= 0xc92) && (code <= 0xca8)) || + ((code >= 0xcaa) && (code <= 0xcb3)) || + ((code >= 0xcb5) && (code <= 0xcb9)) || + (code == 0xcde) || + ((code >= 0xce0) && (code <= 0xce1)) || + ((code >= 0xd05) && (code <= 0xd0c)) || + ((code >= 0xd0e) && (code <= 0xd10)) || + ((code >= 0xd12) && (code <= 0xd28)) || + ((code >= 0xd2a) && (code <= 0xd39)) || + ((code >= 0xd60) && (code <= 0xd61)) || + ((code >= 0xd85) && (code <= 0xd96)) || + ((code >= 0xd9a) && (code <= 0xdb1)) || + ((code >= 0xdb3) && (code <= 0xdbb)) || + (code == 0xdbd) || + ((code >= 0xdc0) && (code <= 0xdc6)) || + ((code >= 0xe01) && (code <= 0xe30)) || + ((code >= 0xe32) && (code <= 0xe33)) || + ((code >= 0xe40) && (code <= 0xe46)) || + ((code >= 0xe81) && (code <= 0xe82)) || + (code == 0xe84) || + ((code >= 0xe87) && (code <= 0xe88)) || + (code == 0xe8a) || + (code == 0xe8d) || + ((code >= 0xe94) && (code <= 0xe97)) || + ((code >= 0xe99) && (code <= 0xe9f)) || + ((code >= 0xea1) && (code <= 0xea3)) || + (code == 0xea5) || + (code == 0xea7) || + ((code >= 0xeaa) && (code <= 0xeab)) || + ((code >= 0xead) && (code <= 0xeb0)) || + ((code >= 0xeb2) && (code <= 0xeb3)) || + (code == 0xebd) || + ((code >= 0xec0) && (code <= 0xec4)) || + (code == 0xec6) || + ((code >= 0xedc) && (code <= 0xedd)) || + (code == 0xf00) || + ((code >= 0xf40) && (code <= 0xf47)) || + ((code >= 0xf49) && (code <= 0xf6a)) || + ((code >= 0xf88) && (code <= 0xf8b)) || + ((code >= 0x1000) && (code <= 0x1021)) || + ((code >= 0x1023) && (code <= 0x1027)) || + ((code >= 0x1029) && (code <= 0x102a)) || + ((code >= 0x1050) && (code <= 0x1055)) || + ((code >= 0x10a0) && (code <= 0x10c5)) || + ((code >= 0x10d0) && (code <= 0x10f6)) || + ((code >= 0x1100) && (code <= 0x1159)) || + ((code >= 0x115f) && (code <= 0x11a2)) || + ((code >= 0x11a8) && (code <= 0x11f9)) || + ((code >= 0x1200) && (code <= 0x1206)) || + ((code >= 0x1208) && (code <= 0x1246)) || + (code == 0x1248) || + ((code >= 0x124a) && (code <= 0x124d)) || + ((code >= 0x1250) && (code <= 0x1256)) || + (code == 0x1258) || + ((code >= 0x125a) && (code <= 0x125d)) || + ((code >= 0x1260) && (code <= 0x1286)) || + (code == 0x1288) || + ((code >= 0x128a) && (code <= 0x128d)) || + ((code >= 0x1290) && (code <= 0x12ae)) || + (code == 0x12b0) || + ((code >= 0x12b2) && (code <= 0x12b5)) || + ((code >= 0x12b8) && (code <= 0x12be)) || + (code == 0x12c0) || + ((code >= 0x12c2) && (code <= 0x12c5)) || + ((code >= 0x12c8) && (code <= 0x12ce)) || + ((code >= 0x12d0) && (code <= 0x12d6)) || + ((code >= 0x12d8) && (code <= 0x12ee)) || + ((code >= 0x12f0) && (code <= 0x130e)) || + (code == 0x1310) || + ((code >= 0x1312) && (code <= 0x1315)) || + ((code >= 0x1318) && (code <= 0x131e)) || + ((code >= 0x1320) && (code <= 0x1346)) || + ((code >= 0x1348) && (code <= 0x135a)) || + ((code >= 0x13a0) && (code <= 0x13f4)) || + ((code >= 0x1401) && (code <= 0x166c)) || + ((code >= 0x166f) && (code <= 0x1676)) || + ((code >= 0x1681) && (code <= 0x169a)) || + ((code >= 0x16a0) && (code <= 0x16ea)) || + ((code >= 0x1780) && (code <= 0x17b3)) || + ((code >= 0x1820) && (code <= 0x1877)) || + ((code >= 0x1880) && (code <= 0x18a8)) || + ((code >= 0x1e00) && (code <= 0x1e9b)) || + ((code >= 0x1ea0) && (code <= 0x1ef9)) || + ((code >= 0x1f00) && (code <= 0x1f15)) || + ((code >= 0x1f18) && (code <= 0x1f1d)) || + ((code >= 0x1f20) && (code <= 0x1f45)) || + ((code >= 0x1f48) && (code <= 0x1f4d)) || + ((code >= 0x1f50) && (code <= 0x1f57)) || + (code == 0x1f59) || + (code == 0x1f5b) || + (code == 0x1f5d) || + ((code >= 0x1f5f) && (code <= 0x1f7d)) || + ((code >= 0x1f80) && (code <= 0x1fb4)) || + ((code >= 0x1fb6) && (code <= 0x1fbc)) || + (code == 0x1fbe) || + ((code >= 0x1fc2) && (code <= 0x1fc4)) || + ((code >= 0x1fc6) && (code <= 0x1fcc)) || + ((code >= 0x1fd0) && (code <= 0x1fd3)) || + ((code >= 0x1fd6) && (code <= 0x1fdb)) || + ((code >= 0x1fe0) && (code <= 0x1fec)) || + ((code >= 0x1ff2) && (code <= 0x1ff4)) || + ((code >= 0x1ff6) && (code <= 0x1ffc)) || + (code == 0x207f) || + (code == 0x2102) || + (code == 0x2107) || + ((code >= 0x210a) && (code <= 0x2113)) || + (code == 0x2115) || + ((code >= 0x2119) && (code <= 0x211d)) || + (code == 0x2124) || + (code == 0x2126) || + (code == 0x2128) || + ((code >= 0x212a) && (code <= 0x212d)) || + ((code >= 0x212f) && (code <= 0x2131)) || + ((code >= 0x2133) && (code <= 0x2139)) || + ((code >= 0x3005) && (code <= 0x3006)) || + ((code >= 0x3031) && (code <= 0x3035)) || + ((code >= 0x3041) && (code <= 0x3094)) || + ((code >= 0x309d) && (code <= 0x309e)) || + ((code >= 0x30a1) && (code <= 0x30fa)) || + ((code >= 0x30fc) && (code <= 0x30fe)) || + ((code >= 0x3105) && (code <= 0x312c)) || + ((code >= 0x3131) && (code <= 0x318e)) || + ((code >= 0x31a0) && (code <= 0x31b7)) || + (code == 0x3400) || + (code == 0x4db5) || + (code == 0x4e00) || + (code == 0x9fa5) || + ((code >= 0xa000) && (code <= 0xa48c)) || + (code == 0xac00) || + (code == 0xd7a3) || + ((code >= 0xf900) && (code <= 0xfa2d)) || + ((code >= 0xfb00) && (code <= 0xfb06)) || + ((code >= 0xfb13) && (code <= 0xfb17)) || + (code == 0xfb1d) || + ((code >= 0xfb1f) && (code <= 0xfb28)) || + ((code >= 0xfb2a) && (code <= 0xfb36)) || + ((code >= 0xfb38) && (code <= 0xfb3c)) || + (code == 0xfb3e) || + ((code >= 0xfb40) && (code <= 0xfb41)) || + ((code >= 0xfb43) && (code <= 0xfb44)) || + ((code >= 0xfb46) && (code <= 0xfbb1)) || + ((code >= 0xfbd3) && (code <= 0xfd3d)) || + ((code >= 0xfd50) && (code <= 0xfd8f)) || + ((code >= 0xfd92) && (code <= 0xfdc7)) || + ((code >= 0xfdf0) && (code <= 0xfdfb)) || + ((code >= 0xfe70) && (code <= 0xfe72)) || + (code == 0xfe74) || + ((code >= 0xfe76) && (code <= 0xfefc)) || + ((code >= 0xff21) && (code <= 0xff3a)) || + ((code >= 0xff41) && (code <= 0xff5a)) || + ((code >= 0xff66) && (code <= 0xffbe)) || + ((code >= 0xffc2) && (code <= 0xffc7)) || + ((code >= 0xffca) && (code <= 0xffcf)) || + ((code >= 0xffd2) && (code <= 0xffd7)) || + ((code >= 0xffda) && (code <= 0xffdc)) || + ((code >= 0x10300) && (code <= 0x1031e)) || + ((code >= 0x10330) && (code <= 0x10349)) || + ((code >= 0x10400) && (code <= 0x10425)) || + ((code >= 0x10428) && (code <= 0x1044d)) || + ((code >= 0x1d400) && (code <= 0x1d454)) || + ((code >= 0x1d456) && (code <= 0x1d49c)) || + ((code >= 0x1d49e) && (code <= 0x1d49f)) || + (code == 0x1d4a2) || + ((code >= 0x1d4a5) && (code <= 0x1d4a6)) || + ((code >= 0x1d4a9) && (code <= 0x1d4ac)) || + ((code >= 0x1d4ae) && (code <= 0x1d4b9)) || + (code == 0x1d4bb) || + ((code >= 0x1d4bd) && (code <= 0x1d4c0)) || + ((code >= 0x1d4c2) && (code <= 0x1d4c3)) || + ((code >= 0x1d4c5) && (code <= 0x1d505)) || + ((code >= 0x1d507) && (code <= 0x1d50a)) || + ((code >= 0x1d50d) && (code <= 0x1d514)) || + ((code >= 0x1d516) && (code <= 0x1d51c)) || + ((code >= 0x1d51e) && (code <= 0x1d539)) || + ((code >= 0x1d53b) && (code <= 0x1d53e)) || + ((code >= 0x1d540) && (code <= 0x1d544)) || + (code == 0x1d546) || + ((code >= 0x1d54a) && (code <= 0x1d550)) || + ((code >= 0x1d552) && (code <= 0x1d6a3)) || + ((code >= 0x1d6a8) && (code <= 0x1d6c0)) || + ((code >= 0x1d6c2) && (code <= 0x1d6da)) || + ((code >= 0x1d6dc) && (code <= 0x1d6fa)) || + ((code >= 0x1d6fc) && (code <= 0x1d714)) || + ((code >= 0x1d716) && (code <= 0x1d734)) || + ((code >= 0x1d736) && (code <= 0x1d74e)) || + ((code >= 0x1d750) && (code <= 0x1d76e)) || + ((code >= 0x1d770) && (code <= 0x1d788)) || + ((code >= 0x1d78a) && (code <= 0x1d7a8)) || + ((code >= 0x1d7aa) && (code <= 0x1d7c2)) || + ((code >= 0x1d7c4) && (code <= 0x1d7c9)) || + (code == 0x20000) || + (code == 0x2a6d6) || + ((code >= 0x2f800) && (code <= 0x2fa1d))); +} + +/** + * xmlUCSIsCatLl: + * @code: UCS code point + * + * Check whether the character is part of Ll UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLl(int code) { + return(((code >= 0x61) && (code <= 0x7a)) || + (code == 0xaa) || + (code == 0xb5) || + (code == 0xba) || + ((code >= 0xdf) && (code <= 0xf6)) || + ((code >= 0xf8) && (code <= 0xff)) || + (code == 0x101) || + (code == 0x103) || + (code == 0x105) || + (code == 0x107) || + (code == 0x109) || + (code == 0x10b) || + (code == 0x10d) || + (code == 0x10f) || + (code == 0x111) || + (code == 0x113) || + (code == 0x115) || + (code == 0x117) || + (code == 0x119) || + (code == 0x11b) || + (code == 0x11d) || + (code == 0x11f) || + (code == 0x121) || + (code == 0x123) || + (code == 0x125) || + (code == 0x127) || + (code == 0x129) || + (code == 0x12b) || + (code == 0x12d) || + (code == 0x12f) || + (code == 0x131) || + (code == 0x133) || + (code == 0x135) || + ((code >= 0x137) && (code <= 0x138)) || + (code == 0x13a) || + (code == 0x13c) || + (code == 0x13e) || + (code == 0x140) || + (code == 0x142) || + (code == 0x144) || + (code == 0x146) || + ((code >= 0x148) && (code <= 0x149)) || + (code == 0x14b) || + (code == 0x14d) || + (code == 0x14f) || + (code == 0x151) || + (code == 0x153) || + (code == 0x155) || + (code == 0x157) || + (code == 0x159) || + (code == 0x15b) || + (code == 0x15d) || + (code == 0x15f) || + (code == 0x161) || + (code == 0x163) || + (code == 0x165) || + (code == 0x167) || + (code == 0x169) || + (code == 0x16b) || + (code == 0x16d) || + (code == 0x16f) || + (code == 0x171) || + (code == 0x173) || + (code == 0x175) || + (code == 0x177) || + (code == 0x17a) || + (code == 0x17c) || + ((code >= 0x17e) && (code <= 0x180)) || + (code == 0x183) || + (code == 0x185) || + (code == 0x188) || + ((code >= 0x18c) && (code <= 0x18d)) || + (code == 0x192) || + (code == 0x195) || + ((code >= 0x199) && (code <= 0x19b)) || + (code == 0x19e) || + (code == 0x1a1) || + (code == 0x1a3) || + (code == 0x1a5) || + (code == 0x1a8) || + ((code >= 0x1aa) && (code <= 0x1ab)) || + (code == 0x1ad) || + (code == 0x1b0) || + (code == 0x1b4) || + (code == 0x1b6) || + ((code >= 0x1b9) && (code <= 0x1ba)) || + ((code >= 0x1bd) && (code <= 0x1bf)) || + (code == 0x1c6) || + (code == 0x1c9) || + (code == 0x1cc) || + (code == 0x1ce) || + (code == 0x1d0) || + (code == 0x1d2) || + (code == 0x1d4) || + (code == 0x1d6) || + (code == 0x1d8) || + (code == 0x1da) || + ((code >= 0x1dc) && (code <= 0x1dd)) || + (code == 0x1df) || + (code == 0x1e1) || + (code == 0x1e3) || + (code == 0x1e5) || + (code == 0x1e7) || + (code == 0x1e9) || + (code == 0x1eb) || + (code == 0x1ed) || + ((code >= 0x1ef) && (code <= 0x1f0)) || + (code == 0x1f3) || + (code == 0x1f5) || + (code == 0x1f9) || + (code == 0x1fb) || + (code == 0x1fd) || + (code == 0x1ff) || + (code == 0x201) || + (code == 0x203) || + (code == 0x205) || + (code == 0x207) || + (code == 0x209) || + (code == 0x20b) || + (code == 0x20d) || + (code == 0x20f) || + (code == 0x211) || + (code == 0x213) || + (code == 0x215) || + (code == 0x217) || + (code == 0x219) || + (code == 0x21b) || + (code == 0x21d) || + (code == 0x21f) || + (code == 0x223) || + (code == 0x225) || + (code == 0x227) || + (code == 0x229) || + (code == 0x22b) || + (code == 0x22d) || + (code == 0x22f) || + (code == 0x231) || + (code == 0x233) || + ((code >= 0x250) && (code <= 0x2ad)) || + (code == 0x390) || + ((code >= 0x3ac) && (code <= 0x3ce)) || + ((code >= 0x3d0) && (code <= 0x3d1)) || + ((code >= 0x3d5) && (code <= 0x3d7)) || + (code == 0x3db) || + (code == 0x3dd) || + (code == 0x3df) || + (code == 0x3e1) || + (code == 0x3e3) || + (code == 0x3e5) || + (code == 0x3e7) || + (code == 0x3e9) || + (code == 0x3eb) || + (code == 0x3ed) || + ((code >= 0x3ef) && (code <= 0x3f3)) || + (code == 0x3f5) || + ((code >= 0x430) && (code <= 0x45f)) || + (code == 0x461) || + (code == 0x463) || + (code == 0x465) || + (code == 0x467) || + (code == 0x469) || + (code == 0x46b) || + (code == 0x46d) || + (code == 0x46f) || + (code == 0x471) || + (code == 0x473) || + (code == 0x475) || + (code == 0x477) || + (code == 0x479) || + (code == 0x47b) || + (code == 0x47d) || + (code == 0x47f) || + (code == 0x481) || + (code == 0x48d) || + (code == 0x48f) || + (code == 0x491) || + (code == 0x493) || + (code == 0x495) || + (code == 0x497) || + (code == 0x499) || + (code == 0x49b) || + (code == 0x49d) || + (code == 0x49f) || + (code == 0x4a1) || + (code == 0x4a3) || + (code == 0x4a5) || + (code == 0x4a7) || + (code == 0x4a9) || + (code == 0x4ab) || + (code == 0x4ad) || + (code == 0x4af) || + (code == 0x4b1) || + (code == 0x4b3) || + (code == 0x4b5) || + (code == 0x4b7) || + (code == 0x4b9) || + (code == 0x4bb) || + (code == 0x4bd) || + (code == 0x4bf) || + (code == 0x4c2) || + (code == 0x4c4) || + (code == 0x4c8) || + (code == 0x4cc) || + (code == 0x4d1) || + (code == 0x4d3) || + (code == 0x4d5) || + (code == 0x4d7) || + (code == 0x4d9) || + (code == 0x4db) || + (code == 0x4dd) || + (code == 0x4df) || + (code == 0x4e1) || + (code == 0x4e3) || + (code == 0x4e5) || + (code == 0x4e7) || + (code == 0x4e9) || + (code == 0x4eb) || + (code == 0x4ed) || + (code == 0x4ef) || + (code == 0x4f1) || + (code == 0x4f3) || + (code == 0x4f5) || + (code == 0x4f9) || + ((code >= 0x561) && (code <= 0x587)) || + (code == 0x1e01) || + (code == 0x1e03) || + (code == 0x1e05) || + (code == 0x1e07) || + (code == 0x1e09) || + (code == 0x1e0b) || + (code == 0x1e0d) || + (code == 0x1e0f) || + (code == 0x1e11) || + (code == 0x1e13) || + (code == 0x1e15) || + (code == 0x1e17) || + (code == 0x1e19) || + (code == 0x1e1b) || + (code == 0x1e1d) || + (code == 0x1e1f) || + (code == 0x1e21) || + (code == 0x1e23) || + (code == 0x1e25) || + (code == 0x1e27) || + (code == 0x1e29) || + (code == 0x1e2b) || + (code == 0x1e2d) || + (code == 0x1e2f) || + (code == 0x1e31) || + (code == 0x1e33) || + (code == 0x1e35) || + (code == 0x1e37) || + (code == 0x1e39) || + (code == 0x1e3b) || + (code == 0x1e3d) || + (code == 0x1e3f) || + (code == 0x1e41) || + (code == 0x1e43) || + (code == 0x1e45) || + (code == 0x1e47) || + (code == 0x1e49) || + (code == 0x1e4b) || + (code == 0x1e4d) || + (code == 0x1e4f) || + (code == 0x1e51) || + (code == 0x1e53) || + (code == 0x1e55) || + (code == 0x1e57) || + (code == 0x1e59) || + (code == 0x1e5b) || + (code == 0x1e5d) || + (code == 0x1e5f) || + (code == 0x1e61) || + (code == 0x1e63) || + (code == 0x1e65) || + (code == 0x1e67) || + (code == 0x1e69) || + (code == 0x1e6b) || + (code == 0x1e6d) || + (code == 0x1e6f) || + (code == 0x1e71) || + (code == 0x1e73) || + (code == 0x1e75) || + (code == 0x1e77) || + (code == 0x1e79) || + (code == 0x1e7b) || + (code == 0x1e7d) || + (code == 0x1e7f) || + (code == 0x1e81) || + (code == 0x1e83) || + (code == 0x1e85) || + (code == 0x1e87) || + (code == 0x1e89) || + (code == 0x1e8b) || + (code == 0x1e8d) || + (code == 0x1e8f) || + (code == 0x1e91) || + (code == 0x1e93) || + ((code >= 0x1e95) && (code <= 0x1e9b)) || + (code == 0x1ea1) || + (code == 0x1ea3) || + (code == 0x1ea5) || + (code == 0x1ea7) || + (code == 0x1ea9) || + (code == 0x1eab) || + (code == 0x1ead) || + (code == 0x1eaf) || + (code == 0x1eb1) || + (code == 0x1eb3) || + (code == 0x1eb5) || + (code == 0x1eb7) || + (code == 0x1eb9) || + (code == 0x1ebb) || + (code == 0x1ebd) || + (code == 0x1ebf) || + (code == 0x1ec1) || + (code == 0x1ec3) || + (code == 0x1ec5) || + (code == 0x1ec7) || + (code == 0x1ec9) || + (code == 0x1ecb) || + (code == 0x1ecd) || + (code == 0x1ecf) || + (code == 0x1ed1) || + (code == 0x1ed3) || + (code == 0x1ed5) || + (code == 0x1ed7) || + (code == 0x1ed9) || + (code == 0x1edb) || + (code == 0x1edd) || + (code == 0x1edf) || + (code == 0x1ee1) || + (code == 0x1ee3) || + (code == 0x1ee5) || + (code == 0x1ee7) || + (code == 0x1ee9) || + (code == 0x1eeb) || + (code == 0x1eed) || + (code == 0x1eef) || + (code == 0x1ef1) || + (code == 0x1ef3) || + (code == 0x1ef5) || + (code == 0x1ef7) || + (code == 0x1ef9) || + ((code >= 0x1f00) && (code <= 0x1f07)) || + ((code >= 0x1f10) && (code <= 0x1f15)) || + ((code >= 0x1f20) && (code <= 0x1f27)) || + ((code >= 0x1f30) && (code <= 0x1f37)) || + ((code >= 0x1f40) && (code <= 0x1f45)) || + ((code >= 0x1f50) && (code <= 0x1f57)) || + ((code >= 0x1f60) && (code <= 0x1f67)) || + ((code >= 0x1f70) && (code <= 0x1f7d)) || + ((code >= 0x1f80) && (code <= 0x1f87)) || + ((code >= 0x1f90) && (code <= 0x1f97)) || + ((code >= 0x1fa0) && (code <= 0x1fa7)) || + ((code >= 0x1fb0) && (code <= 0x1fb4)) || + ((code >= 0x1fb6) && (code <= 0x1fb7)) || + (code == 0x1fbe) || + ((code >= 0x1fc2) && (code <= 0x1fc4)) || + ((code >= 0x1fc6) && (code <= 0x1fc7)) || + ((code >= 0x1fd0) && (code <= 0x1fd3)) || + ((code >= 0x1fd6) && (code <= 0x1fd7)) || + ((code >= 0x1fe0) && (code <= 0x1fe7)) || + ((code >= 0x1ff2) && (code <= 0x1ff4)) || + ((code >= 0x1ff6) && (code <= 0x1ff7)) || + (code == 0x207f) || + (code == 0x210a) || + ((code >= 0x210e) && (code <= 0x210f)) || + (code == 0x2113) || + (code == 0x212f) || + (code == 0x2134) || + (code == 0x2139) || + ((code >= 0xfb00) && (code <= 0xfb06)) || + ((code >= 0xfb13) && (code <= 0xfb17)) || + ((code >= 0xff41) && (code <= 0xff5a)) || + ((code >= 0x10428) && (code <= 0x1044d)) || + ((code >= 0x1d41a) && (code <= 0x1d433)) || + ((code >= 0x1d44e) && (code <= 0x1d454)) || + ((code >= 0x1d456) && (code <= 0x1d467)) || + ((code >= 0x1d482) && (code <= 0x1d49b)) || + ((code >= 0x1d4b6) && (code <= 0x1d4b9)) || + (code == 0x1d4bb) || + ((code >= 0x1d4bd) && (code <= 0x1d4c0)) || + ((code >= 0x1d4c2) && (code <= 0x1d4c3)) || + ((code >= 0x1d4c5) && (code <= 0x1d4cf)) || + ((code >= 0x1d4ea) && (code <= 0x1d503)) || + ((code >= 0x1d51e) && (code <= 0x1d537)) || + ((code >= 0x1d552) && (code <= 0x1d56b)) || + ((code >= 0x1d586) && (code <= 0x1d59f)) || + ((code >= 0x1d5ba) && (code <= 0x1d5d3)) || + ((code >= 0x1d5ee) && (code <= 0x1d607)) || + ((code >= 0x1d622) && (code <= 0x1d63b)) || + ((code >= 0x1d656) && (code <= 0x1d66f)) || + ((code >= 0x1d68a) && (code <= 0x1d6a3)) || + ((code >= 0x1d6c2) && (code <= 0x1d6da)) || + ((code >= 0x1d6dc) && (code <= 0x1d6e1)) || + ((code >= 0x1d6fc) && (code <= 0x1d714)) || + ((code >= 0x1d716) && (code <= 0x1d71b)) || + ((code >= 0x1d736) && (code <= 0x1d74e)) || + ((code >= 0x1d750) && (code <= 0x1d755)) || + ((code >= 0x1d770) && (code <= 0x1d788)) || + ((code >= 0x1d78a) && (code <= 0x1d78f)) || + ((code >= 0x1d7aa) && (code <= 0x1d7c2)) || + ((code >= 0x1d7c4) && (code <= 0x1d7c9))); +} + +/** + * xmlUCSIsCatLm: + * @code: UCS code point + * + * Check whether the character is part of Lm UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLm(int code) { + return(((code >= 0x2b0) && (code <= 0x2b8)) || + ((code >= 0x2bb) && (code <= 0x2c1)) || + ((code >= 0x2d0) && (code <= 0x2d1)) || + ((code >= 0x2e0) && (code <= 0x2e4)) || + (code == 0x2ee) || + (code == 0x37a) || + (code == 0x559) || + (code == 0x640) || + ((code >= 0x6e5) && (code <= 0x6e6)) || + (code == 0xe46) || + (code == 0xec6) || + (code == 0x1843) || + (code == 0x3005) || + ((code >= 0x3031) && (code <= 0x3035)) || + ((code >= 0x309d) && (code <= 0x309e)) || + ((code >= 0x30fc) && (code <= 0x30fe)) || + (code == 0xff70) || + ((code >= 0xff9e) && (code <= 0xff9f))); +} + +/** + * xmlUCSIsCatLo: + * @code: UCS code point + * + * Check whether the character is part of Lo UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLo(int code) { + return((code == 0x1bb) || + ((code >= 0x1c0) && (code <= 0x1c3)) || + ((code >= 0x5d0) && (code <= 0x5ea)) || + ((code >= 0x5f0) && (code <= 0x5f2)) || + ((code >= 0x621) && (code <= 0x63a)) || + ((code >= 0x641) && (code <= 0x64a)) || + ((code >= 0x671) && (code <= 0x6d3)) || + (code == 0x6d5) || + ((code >= 0x6fa) && (code <= 0x6fc)) || + (code == 0x710) || + ((code >= 0x712) && (code <= 0x72c)) || + ((code >= 0x780) && (code <= 0x7a5)) || + ((code >= 0x905) && (code <= 0x939)) || + (code == 0x93d) || + (code == 0x950) || + ((code >= 0x958) && (code <= 0x961)) || + ((code >= 0x985) && (code <= 0x98c)) || + ((code >= 0x98f) && (code <= 0x990)) || + ((code >= 0x993) && (code <= 0x9a8)) || + ((code >= 0x9aa) && (code <= 0x9b0)) || + (code == 0x9b2) || + ((code >= 0x9b6) && (code <= 0x9b9)) || + ((code >= 0x9dc) && (code <= 0x9dd)) || + ((code >= 0x9df) && (code <= 0x9e1)) || + ((code >= 0x9f0) && (code <= 0x9f1)) || + ((code >= 0xa05) && (code <= 0xa0a)) || + ((code >= 0xa0f) && (code <= 0xa10)) || + ((code >= 0xa13) && (code <= 0xa28)) || + ((code >= 0xa2a) && (code <= 0xa30)) || + ((code >= 0xa32) && (code <= 0xa33)) || + ((code >= 0xa35) && (code <= 0xa36)) || + ((code >= 0xa38) && (code <= 0xa39)) || + ((code >= 0xa59) && (code <= 0xa5c)) || + (code == 0xa5e) || + ((code >= 0xa72) && (code <= 0xa74)) || + ((code >= 0xa85) && (code <= 0xa8b)) || + (code == 0xa8d) || + ((code >= 0xa8f) && (code <= 0xa91)) || + ((code >= 0xa93) && (code <= 0xaa8)) || + ((code >= 0xaaa) && (code <= 0xab0)) || + ((code >= 0xab2) && (code <= 0xab3)) || + ((code >= 0xab5) && (code <= 0xab9)) || + (code == 0xabd) || + (code == 0xad0) || + (code == 0xae0) || + ((code >= 0xb05) && (code <= 0xb0c)) || + ((code >= 0xb0f) && (code <= 0xb10)) || + ((code >= 0xb13) && (code <= 0xb28)) || + ((code >= 0xb2a) && (code <= 0xb30)) || + ((code >= 0xb32) && (code <= 0xb33)) || + ((code >= 0xb36) && (code <= 0xb39)) || + (code == 0xb3d) || + ((code >= 0xb5c) && (code <= 0xb5d)) || + ((code >= 0xb5f) && (code <= 0xb61)) || + ((code >= 0xb85) && (code <= 0xb8a)) || + ((code >= 0xb8e) && (code <= 0xb90)) || + ((code >= 0xb92) && (code <= 0xb95)) || + ((code >= 0xb99) && (code <= 0xb9a)) || + (code == 0xb9c) || + ((code >= 0xb9e) && (code <= 0xb9f)) || + ((code >= 0xba3) && (code <= 0xba4)) || + ((code >= 0xba8) && (code <= 0xbaa)) || + ((code >= 0xbae) && (code <= 0xbb5)) || + ((code >= 0xbb7) && (code <= 0xbb9)) || + ((code >= 0xc05) && (code <= 0xc0c)) || + ((code >= 0xc0e) && (code <= 0xc10)) || + ((code >= 0xc12) && (code <= 0xc28)) || + ((code >= 0xc2a) && (code <= 0xc33)) || + ((code >= 0xc35) && (code <= 0xc39)) || + ((code >= 0xc60) && (code <= 0xc61)) || + ((code >= 0xc85) && (code <= 0xc8c)) || + ((code >= 0xc8e) && (code <= 0xc90)) || + ((code >= 0xc92) && (code <= 0xca8)) || + ((code >= 0xcaa) && (code <= 0xcb3)) || + ((code >= 0xcb5) && (code <= 0xcb9)) || + (code == 0xcde) || + ((code >= 0xce0) && (code <= 0xce1)) || + ((code >= 0xd05) && (code <= 0xd0c)) || + ((code >= 0xd0e) && (code <= 0xd10)) || + ((code >= 0xd12) && (code <= 0xd28)) || + ((code >= 0xd2a) && (code <= 0xd39)) || + ((code >= 0xd60) && (code <= 0xd61)) || + ((code >= 0xd85) && (code <= 0xd96)) || + ((code >= 0xd9a) && (code <= 0xdb1)) || + ((code >= 0xdb3) && (code <= 0xdbb)) || + (code == 0xdbd) || + ((code >= 0xdc0) && (code <= 0xdc6)) || + ((code >= 0xe01) && (code <= 0xe30)) || + ((code >= 0xe32) && (code <= 0xe33)) || + ((code >= 0xe40) && (code <= 0xe45)) || + ((code >= 0xe81) && (code <= 0xe82)) || + (code == 0xe84) || + ((code >= 0xe87) && (code <= 0xe88)) || + (code == 0xe8a) || + (code == 0xe8d) || + ((code >= 0xe94) && (code <= 0xe97)) || + ((code >= 0xe99) && (code <= 0xe9f)) || + ((code >= 0xea1) && (code <= 0xea3)) || + (code == 0xea5) || + (code == 0xea7) || + ((code >= 0xeaa) && (code <= 0xeab)) || + ((code >= 0xead) && (code <= 0xeb0)) || + ((code >= 0xeb2) && (code <= 0xeb3)) || + (code == 0xebd) || + ((code >= 0xec0) && (code <= 0xec4)) || + ((code >= 0xedc) && (code <= 0xedd)) || + (code == 0xf00) || + ((code >= 0xf40) && (code <= 0xf47)) || + ((code >= 0xf49) && (code <= 0xf6a)) || + ((code >= 0xf88) && (code <= 0xf8b)) || + ((code >= 0x1000) && (code <= 0x1021)) || + ((code >= 0x1023) && (code <= 0x1027)) || + ((code >= 0x1029) && (code <= 0x102a)) || + ((code >= 0x1050) && (code <= 0x1055)) || + ((code >= 0x10d0) && (code <= 0x10f6)) || + ((code >= 0x1100) && (code <= 0x1159)) || + ((code >= 0x115f) && (code <= 0x11a2)) || + ((code >= 0x11a8) && (code <= 0x11f9)) || + ((code >= 0x1200) && (code <= 0x1206)) || + ((code >= 0x1208) && (code <= 0x1246)) || + (code == 0x1248) || + ((code >= 0x124a) && (code <= 0x124d)) || + ((code >= 0x1250) && (code <= 0x1256)) || + (code == 0x1258) || + ((code >= 0x125a) && (code <= 0x125d)) || + ((code >= 0x1260) && (code <= 0x1286)) || + (code == 0x1288) || + ((code >= 0x128a) && (code <= 0x128d)) || + ((code >= 0x1290) && (code <= 0x12ae)) || + (code == 0x12b0) || + ((code >= 0x12b2) && (code <= 0x12b5)) || + ((code >= 0x12b8) && (code <= 0x12be)) || + (code == 0x12c0) || + ((code >= 0x12c2) && (code <= 0x12c5)) || + ((code >= 0x12c8) && (code <= 0x12ce)) || + ((code >= 0x12d0) && (code <= 0x12d6)) || + ((code >= 0x12d8) && (code <= 0x12ee)) || + ((code >= 0x12f0) && (code <= 0x130e)) || + (code == 0x1310) || + ((code >= 0x1312) && (code <= 0x1315)) || + ((code >= 0x1318) && (code <= 0x131e)) || + ((code >= 0x1320) && (code <= 0x1346)) || + ((code >= 0x1348) && (code <= 0x135a)) || + ((code >= 0x13a0) && (code <= 0x13f4)) || + ((code >= 0x1401) && (code <= 0x166c)) || + ((code >= 0x166f) && (code <= 0x1676)) || + ((code >= 0x1681) && (code <= 0x169a)) || + ((code >= 0x16a0) && (code <= 0x16ea)) || + ((code >= 0x1780) && (code <= 0x17b3)) || + ((code >= 0x1820) && (code <= 0x1842)) || + ((code >= 0x1844) && (code <= 0x1877)) || + ((code >= 0x1880) && (code <= 0x18a8)) || + ((code >= 0x2135) && (code <= 0x2138)) || + (code == 0x3006) || + ((code >= 0x3041) && (code <= 0x3094)) || + ((code >= 0x30a1) && (code <= 0x30fa)) || + ((code >= 0x3105) && (code <= 0x312c)) || + ((code >= 0x3131) && (code <= 0x318e)) || + ((code >= 0x31a0) && (code <= 0x31b7)) || + (code == 0x3400) || + (code == 0x4db5) || + (code == 0x4e00) || + (code == 0x9fa5) || + ((code >= 0xa000) && (code <= 0xa48c)) || + (code == 0xac00) || + (code == 0xd7a3) || + ((code >= 0xf900) && (code <= 0xfa2d)) || + (code == 0xfb1d) || + ((code >= 0xfb1f) && (code <= 0xfb28)) || + ((code >= 0xfb2a) && (code <= 0xfb36)) || + ((code >= 0xfb38) && (code <= 0xfb3c)) || + (code == 0xfb3e) || + ((code >= 0xfb40) && (code <= 0xfb41)) || + ((code >= 0xfb43) && (code <= 0xfb44)) || + ((code >= 0xfb46) && (code <= 0xfbb1)) || + ((code >= 0xfbd3) && (code <= 0xfd3d)) || + ((code >= 0xfd50) && (code <= 0xfd8f)) || + ((code >= 0xfd92) && (code <= 0xfdc7)) || + ((code >= 0xfdf0) && (code <= 0xfdfb)) || + ((code >= 0xfe70) && (code <= 0xfe72)) || + (code == 0xfe74) || + ((code >= 0xfe76) && (code <= 0xfefc)) || + ((code >= 0xff66) && (code <= 0xff6f)) || + ((code >= 0xff71) && (code <= 0xff9d)) || + ((code >= 0xffa0) && (code <= 0xffbe)) || + ((code >= 0xffc2) && (code <= 0xffc7)) || + ((code >= 0xffca) && (code <= 0xffcf)) || + ((code >= 0xffd2) && (code <= 0xffd7)) || + ((code >= 0xffda) && (code <= 0xffdc)) || + ((code >= 0x10300) && (code <= 0x1031e)) || + ((code >= 0x10330) && (code <= 0x10349)) || + (code == 0x20000) || + (code == 0x2a6d6) || + ((code >= 0x2f800) && (code <= 0x2fa1d))); +} + +/** + * xmlUCSIsCatLt: + * @code: UCS code point + * + * Check whether the character is part of Lt UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLt(int code) { + return((code == 0x1c5) || + (code == 0x1c8) || + (code == 0x1cb) || + (code == 0x1f2) || + ((code >= 0x1f88) && (code <= 0x1f8f)) || + ((code >= 0x1f98) && (code <= 0x1f9f)) || + ((code >= 0x1fa8) && (code <= 0x1faf)) || + (code == 0x1fbc) || + (code == 0x1fcc) || + (code == 0x1ffc)); +} + +/** + * xmlUCSIsCatLu: + * @code: UCS code point + * + * Check whether the character is part of Lu UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLu(int code) { + return(((code >= 0x41) && (code <= 0x5a)) || + ((code >= 0xc0) && (code <= 0xd6)) || + ((code >= 0xd8) && (code <= 0xde)) || + (code == 0x100) || + (code == 0x102) || + (code == 0x104) || + (code == 0x106) || + (code == 0x108) || + (code == 0x10a) || + (code == 0x10c) || + (code == 0x10e) || + (code == 0x110) || + (code == 0x112) || + (code == 0x114) || + (code == 0x116) || + (code == 0x118) || + (code == 0x11a) || + (code == 0x11c) || + (code == 0x11e) || + (code == 0x120) || + (code == 0x122) || + (code == 0x124) || + (code == 0x126) || + (code == 0x128) || + (code == 0x12a) || + (code == 0x12c) || + (code == 0x12e) || + (code == 0x130) || + (code == 0x132) || + (code == 0x134) || + (code == 0x136) || + (code == 0x139) || + (code == 0x13b) || + (code == 0x13d) || + (code == 0x13f) || + (code == 0x141) || + (code == 0x143) || + (code == 0x145) || + (code == 0x147) || + (code == 0x14a) || + (code == 0x14c) || + (code == 0x14e) || + (code == 0x150) || + (code == 0x152) || + (code == 0x154) || + (code == 0x156) || + (code == 0x158) || + (code == 0x15a) || + (code == 0x15c) || + (code == 0x15e) || + (code == 0x160) || + (code == 0x162) || + (code == 0x164) || + (code == 0x166) || + (code == 0x168) || + (code == 0x16a) || + (code == 0x16c) || + (code == 0x16e) || + (code == 0x170) || + (code == 0x172) || + (code == 0x174) || + (code == 0x176) || + ((code >= 0x178) && (code <= 0x179)) || + (code == 0x17b) || + (code == 0x17d) || + ((code >= 0x181) && (code <= 0x182)) || + (code == 0x184) || + ((code >= 0x186) && (code <= 0x187)) || + ((code >= 0x189) && (code <= 0x18b)) || + ((code >= 0x18e) && (code <= 0x191)) || + ((code >= 0x193) && (code <= 0x194)) || + ((code >= 0x196) && (code <= 0x198)) || + ((code >= 0x19c) && (code <= 0x19d)) || + ((code >= 0x19f) && (code <= 0x1a0)) || + (code == 0x1a2) || + (code == 0x1a4) || + ((code >= 0x1a6) && (code <= 0x1a7)) || + (code == 0x1a9) || + (code == 0x1ac) || + ((code >= 0x1ae) && (code <= 0x1af)) || + ((code >= 0x1b1) && (code <= 0x1b3)) || + (code == 0x1b5) || + ((code >= 0x1b7) && (code <= 0x1b8)) || + (code == 0x1bc) || + (code == 0x1c4) || + (code == 0x1c7) || + (code == 0x1ca) || + (code == 0x1cd) || + (code == 0x1cf) || + (code == 0x1d1) || + (code == 0x1d3) || + (code == 0x1d5) || + (code == 0x1d7) || + (code == 0x1d9) || + (code == 0x1db) || + (code == 0x1de) || + (code == 0x1e0) || + (code == 0x1e2) || + (code == 0x1e4) || + (code == 0x1e6) || + (code == 0x1e8) || + (code == 0x1ea) || + (code == 0x1ec) || + (code == 0x1ee) || + (code == 0x1f1) || + (code == 0x1f4) || + ((code >= 0x1f6) && (code <= 0x1f8)) || + (code == 0x1fa) || + (code == 0x1fc) || + (code == 0x1fe) || + (code == 0x200) || + (code == 0x202) || + (code == 0x204) || + (code == 0x206) || + (code == 0x208) || + (code == 0x20a) || + (code == 0x20c) || + (code == 0x20e) || + (code == 0x210) || + (code == 0x212) || + (code == 0x214) || + (code == 0x216) || + (code == 0x218) || + (code == 0x21a) || + (code == 0x21c) || + (code == 0x21e) || + (code == 0x222) || + (code == 0x224) || + (code == 0x226) || + (code == 0x228) || + (code == 0x22a) || + (code == 0x22c) || + (code == 0x22e) || + (code == 0x230) || + (code == 0x232) || + (code == 0x386) || + ((code >= 0x388) && (code <= 0x38a)) || + (code == 0x38c) || + ((code >= 0x38e) && (code <= 0x38f)) || + ((code >= 0x391) && (code <= 0x3a1)) || + ((code >= 0x3a3) && (code <= 0x3ab)) || + ((code >= 0x3d2) && (code <= 0x3d4)) || + (code == 0x3da) || + (code == 0x3dc) || + (code == 0x3de) || + (code == 0x3e0) || + (code == 0x3e2) || + (code == 0x3e4) || + (code == 0x3e6) || + (code == 0x3e8) || + (code == 0x3ea) || + (code == 0x3ec) || + (code == 0x3ee) || + (code == 0x3f4) || + ((code >= 0x400) && (code <= 0x42f)) || + (code == 0x460) || + (code == 0x462) || + (code == 0x464) || + (code == 0x466) || + (code == 0x468) || + (code == 0x46a) || + (code == 0x46c) || + (code == 0x46e) || + (code == 0x470) || + (code == 0x472) || + (code == 0x474) || + (code == 0x476) || + (code == 0x478) || + (code == 0x47a) || + (code == 0x47c) || + (code == 0x47e) || + (code == 0x480) || + (code == 0x48c) || + (code == 0x48e) || + (code == 0x490) || + (code == 0x492) || + (code == 0x494) || + (code == 0x496) || + (code == 0x498) || + (code == 0x49a) || + (code == 0x49c) || + (code == 0x49e) || + (code == 0x4a0) || + (code == 0x4a2) || + (code == 0x4a4) || + (code == 0x4a6) || + (code == 0x4a8) || + (code == 0x4aa) || + (code == 0x4ac) || + (code == 0x4ae) || + (code == 0x4b0) || + (code == 0x4b2) || + (code == 0x4b4) || + (code == 0x4b6) || + (code == 0x4b8) || + (code == 0x4ba) || + (code == 0x4bc) || + (code == 0x4be) || + ((code >= 0x4c0) && (code <= 0x4c1)) || + (code == 0x4c3) || + (code == 0x4c7) || + (code == 0x4cb) || + (code == 0x4d0) || + (code == 0x4d2) || + (code == 0x4d4) || + (code == 0x4d6) || + (code == 0x4d8) || + (code == 0x4da) || + (code == 0x4dc) || + (code == 0x4de) || + (code == 0x4e0) || + (code == 0x4e2) || + (code == 0x4e4) || + (code == 0x4e6) || + (code == 0x4e8) || + (code == 0x4ea) || + (code == 0x4ec) || + (code == 0x4ee) || + (code == 0x4f0) || + (code == 0x4f2) || + (code == 0x4f4) || + (code == 0x4f8) || + ((code >= 0x531) && (code <= 0x556)) || + ((code >= 0x10a0) && (code <= 0x10c5)) || + (code == 0x1e00) || + (code == 0x1e02) || + (code == 0x1e04) || + (code == 0x1e06) || + (code == 0x1e08) || + (code == 0x1e0a) || + (code == 0x1e0c) || + (code == 0x1e0e) || + (code == 0x1e10) || + (code == 0x1e12) || + (code == 0x1e14) || + (code == 0x1e16) || + (code == 0x1e18) || + (code == 0x1e1a) || + (code == 0x1e1c) || + (code == 0x1e1e) || + (code == 0x1e20) || + (code == 0x1e22) || + (code == 0x1e24) || + (code == 0x1e26) || + (code == 0x1e28) || + (code == 0x1e2a) || + (code == 0x1e2c) || + (code == 0x1e2e) || + (code == 0x1e30) || + (code == 0x1e32) || + (code == 0x1e34) || + (code == 0x1e36) || + (code == 0x1e38) || + (code == 0x1e3a) || + (code == 0x1e3c) || + (code == 0x1e3e) || + (code == 0x1e40) || + (code == 0x1e42) || + (code == 0x1e44) || + (code == 0x1e46) || + (code == 0x1e48) || + (code == 0x1e4a) || + (code == 0x1e4c) || + (code == 0x1e4e) || + (code == 0x1e50) || + (code == 0x1e52) || + (code == 0x1e54) || + (code == 0x1e56) || + (code == 0x1e58) || + (code == 0x1e5a) || + (code == 0x1e5c) || + (code == 0x1e5e) || + (code == 0x1e60) || + (code == 0x1e62) || + (code == 0x1e64) || + (code == 0x1e66) || + (code == 0x1e68) || + (code == 0x1e6a) || + (code == 0x1e6c) || + (code == 0x1e6e) || + (code == 0x1e70) || + (code == 0x1e72) || + (code == 0x1e74) || + (code == 0x1e76) || + (code == 0x1e78) || + (code == 0x1e7a) || + (code == 0x1e7c) || + (code == 0x1e7e) || + (code == 0x1e80) || + (code == 0x1e82) || + (code == 0x1e84) || + (code == 0x1e86) || + (code == 0x1e88) || + (code == 0x1e8a) || + (code == 0x1e8c) || + (code == 0x1e8e) || + (code == 0x1e90) || + (code == 0x1e92) || + (code == 0x1e94) || + (code == 0x1ea0) || + (code == 0x1ea2) || + (code == 0x1ea4) || + (code == 0x1ea6) || + (code == 0x1ea8) || + (code == 0x1eaa) || + (code == 0x1eac) || + (code == 0x1eae) || + (code == 0x1eb0) || + (code == 0x1eb2) || + (code == 0x1eb4) || + (code == 0x1eb6) || + (code == 0x1eb8) || + (code == 0x1eba) || + (code == 0x1ebc) || + (code == 0x1ebe) || + (code == 0x1ec0) || + (code == 0x1ec2) || + (code == 0x1ec4) || + (code == 0x1ec6) || + (code == 0x1ec8) || + (code == 0x1eca) || + (code == 0x1ecc) || + (code == 0x1ece) || + (code == 0x1ed0) || + (code == 0x1ed2) || + (code == 0x1ed4) || + (code == 0x1ed6) || + (code == 0x1ed8) || + (code == 0x1eda) || + (code == 0x1edc) || + (code == 0x1ede) || + (code == 0x1ee0) || + (code == 0x1ee2) || + (code == 0x1ee4) || + (code == 0x1ee6) || + (code == 0x1ee8) || + (code == 0x1eea) || + (code == 0x1eec) || + (code == 0x1eee) || + (code == 0x1ef0) || + (code == 0x1ef2) || + (code == 0x1ef4) || + (code == 0x1ef6) || + (code == 0x1ef8) || + ((code >= 0x1f08) && (code <= 0x1f0f)) || + ((code >= 0x1f18) && (code <= 0x1f1d)) || + ((code >= 0x1f28) && (code <= 0x1f2f)) || + ((code >= 0x1f38) && (code <= 0x1f3f)) || + ((code >= 0x1f48) && (code <= 0x1f4d)) || + (code == 0x1f59) || + (code == 0x1f5b) || + (code == 0x1f5d) || + (code == 0x1f5f) || + ((code >= 0x1f68) && (code <= 0x1f6f)) || + ((code >= 0x1fb8) && (code <= 0x1fbb)) || + ((code >= 0x1fc8) && (code <= 0x1fcb)) || + ((code >= 0x1fd8) && (code <= 0x1fdb)) || + ((code >= 0x1fe8) && (code <= 0x1fec)) || + ((code >= 0x1ff8) && (code <= 0x1ffb)) || + (code == 0x2102) || + (code == 0x2107) || + ((code >= 0x210b) && (code <= 0x210d)) || + ((code >= 0x2110) && (code <= 0x2112)) || + (code == 0x2115) || + ((code >= 0x2119) && (code <= 0x211d)) || + (code == 0x2124) || + (code == 0x2126) || + (code == 0x2128) || + ((code >= 0x212a) && (code <= 0x212d)) || + ((code >= 0x2130) && (code <= 0x2131)) || + (code == 0x2133) || + ((code >= 0xff21) && (code <= 0xff3a)) || + ((code >= 0x10400) && (code <= 0x10425)) || + ((code >= 0x1d400) && (code <= 0x1d419)) || + ((code >= 0x1d434) && (code <= 0x1d44d)) || + ((code >= 0x1d468) && (code <= 0x1d481)) || + (code == 0x1d49c) || + ((code >= 0x1d49e) && (code <= 0x1d49f)) || + (code == 0x1d4a2) || + ((code >= 0x1d4a5) && (code <= 0x1d4a6)) || + ((code >= 0x1d4a9) && (code <= 0x1d4ac)) || + ((code >= 0x1d4ae) && (code <= 0x1d4b5)) || + ((code >= 0x1d4d0) && (code <= 0x1d4e9)) || + ((code >= 0x1d504) && (code <= 0x1d505)) || + ((code >= 0x1d507) && (code <= 0x1d50a)) || + ((code >= 0x1d50d) && (code <= 0x1d514)) || + ((code >= 0x1d516) && (code <= 0x1d51c)) || + ((code >= 0x1d538) && (code <= 0x1d539)) || + ((code >= 0x1d53b) && (code <= 0x1d53e)) || + ((code >= 0x1d540) && (code <= 0x1d544)) || + (code == 0x1d546) || + ((code >= 0x1d54a) && (code <= 0x1d550)) || + ((code >= 0x1d56c) && (code <= 0x1d585)) || + ((code >= 0x1d5a0) && (code <= 0x1d5b9)) || + ((code >= 0x1d5d4) && (code <= 0x1d5ed)) || + ((code >= 0x1d608) && (code <= 0x1d621)) || + ((code >= 0x1d63c) && (code <= 0x1d655)) || + ((code >= 0x1d670) && (code <= 0x1d689)) || + ((code >= 0x1d6a8) && (code <= 0x1d6c0)) || + ((code >= 0x1d6e2) && (code <= 0x1d6fa)) || + ((code >= 0x1d71c) && (code <= 0x1d734)) || + ((code >= 0x1d756) && (code <= 0x1d76e)) || + ((code >= 0x1d790) && (code <= 0x1d7a8))); +} + +/** + * xmlUCSIsCatM: + * @code: UCS code point + * + * Check whether the character is part of M UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatM(int code) { + return(((code >= 0x300) && (code <= 0x34e)) || + ((code >= 0x360) && (code <= 0x362)) || + ((code >= 0x483) && (code <= 0x486)) || + ((code >= 0x488) && (code <= 0x489)) || + ((code >= 0x591) && (code <= 0x5a1)) || + ((code >= 0x5a3) && (code <= 0x5b9)) || + ((code >= 0x5bb) && (code <= 0x5bd)) || + (code == 0x5bf) || + ((code >= 0x5c1) && (code <= 0x5c2)) || + (code == 0x5c4) || + ((code >= 0x64b) && (code <= 0x655)) || + (code == 0x670) || + ((code >= 0x6d6) && (code <= 0x6e4)) || + ((code >= 0x6e7) && (code <= 0x6e8)) || + ((code >= 0x6ea) && (code <= 0x6ed)) || + (code == 0x711) || + ((code >= 0x730) && (code <= 0x74a)) || + ((code >= 0x7a6) && (code <= 0x7b0)) || + ((code >= 0x901) && (code <= 0x903)) || + (code == 0x93c) || + ((code >= 0x93e) && (code <= 0x94d)) || + ((code >= 0x951) && (code <= 0x954)) || + ((code >= 0x962) && (code <= 0x963)) || + ((code >= 0x981) && (code <= 0x983)) || + (code == 0x9bc) || + ((code >= 0x9be) && (code <= 0x9c4)) || + ((code >= 0x9c7) && (code <= 0x9c8)) || + ((code >= 0x9cb) && (code <= 0x9cd)) || + (code == 0x9d7) || + ((code >= 0x9e2) && (code <= 0x9e3)) || + (code == 0xa02) || + (code == 0xa3c) || + ((code >= 0xa3e) && (code <= 0xa42)) || + ((code >= 0xa47) && (code <= 0xa48)) || + ((code >= 0xa4b) && (code <= 0xa4d)) || + ((code >= 0xa70) && (code <= 0xa71)) || + ((code >= 0xa81) && (code <= 0xa83)) || + (code == 0xabc) || + ((code >= 0xabe) && (code <= 0xac5)) || + ((code >= 0xac7) && (code <= 0xac9)) || + ((code >= 0xacb) && (code <= 0xacd)) || + ((code >= 0xb01) && (code <= 0xb03)) || + (code == 0xb3c) || + ((code >= 0xb3e) && (code <= 0xb43)) || + ((code >= 0xb47) && (code <= 0xb48)) || + ((code >= 0xb4b) && (code <= 0xb4d)) || + ((code >= 0xb56) && (code <= 0xb57)) || + ((code >= 0xb82) && (code <= 0xb83)) || + ((code >= 0xbbe) && (code <= 0xbc2)) || + ((code >= 0xbc6) && (code <= 0xbc8)) || + ((code >= 0xbca) && (code <= 0xbcd)) || + (code == 0xbd7) || + ((code >= 0xc01) && (code <= 0xc03)) || + ((code >= 0xc3e) && (code <= 0xc44)) || + ((code >= 0xc46) && (code <= 0xc48)) || + ((code >= 0xc4a) && (code <= 0xc4d)) || + ((code >= 0xc55) && (code <= 0xc56)) || + ((code >= 0xc82) && (code <= 0xc83)) || + ((code >= 0xcbe) && (code <= 0xcc4)) || + ((code >= 0xcc6) && (code <= 0xcc8)) || + ((code >= 0xcca) && (code <= 0xccd)) || + ((code >= 0xcd5) && (code <= 0xcd6)) || + ((code >= 0xd02) && (code <= 0xd03)) || + ((code >= 0xd3e) && (code <= 0xd43)) || + ((code >= 0xd46) && (code <= 0xd48)) || + ((code >= 0xd4a) && (code <= 0xd4d)) || + (code == 0xd57) || + ((code >= 0xd82) && (code <= 0xd83)) || + (code == 0xdca) || + ((code >= 0xdcf) && (code <= 0xdd4)) || + (code == 0xdd6) || + ((code >= 0xdd8) && (code <= 0xddf)) || + ((code >= 0xdf2) && (code <= 0xdf3)) || + (code == 0xe31) || + ((code >= 0xe34) && (code <= 0xe3a)) || + ((code >= 0xe47) && (code <= 0xe4e)) || + (code == 0xeb1) || + ((code >= 0xeb4) && (code <= 0xeb9)) || + ((code >= 0xebb) && (code <= 0xebc)) || + ((code >= 0xec8) && (code <= 0xecd)) || + ((code >= 0xf18) && (code <= 0xf19)) || + (code == 0xf35) || + (code == 0xf37) || + (code == 0xf39) || + ((code >= 0xf3e) && (code <= 0xf3f)) || + ((code >= 0xf71) && (code <= 0xf84)) || + ((code >= 0xf86) && (code <= 0xf87)) || + ((code >= 0xf90) && (code <= 0xf97)) || + ((code >= 0xf99) && (code <= 0xfbc)) || + (code == 0xfc6) || + ((code >= 0x102c) && (code <= 0x1032)) || + ((code >= 0x1036) && (code <= 0x1039)) || + ((code >= 0x1056) && (code <= 0x1059)) || + ((code >= 0x17b4) && (code <= 0x17d3)) || + (code == 0x18a9) || + ((code >= 0x20d0) && (code <= 0x20e3)) || + ((code >= 0x302a) && (code <= 0x302f)) || + ((code >= 0x3099) && (code <= 0x309a)) || + (code == 0xfb1e) || + ((code >= 0xfe20) && (code <= 0xfe23)) || + ((code >= 0x1d165) && (code <= 0x1d169)) || + ((code >= 0x1d16d) && (code <= 0x1d172)) || + ((code >= 0x1d17b) && (code <= 0x1d182)) || + ((code >= 0x1d185) && (code <= 0x1d18b)) || + ((code >= 0x1d1aa) && (code <= 0x1d1ad))); +} + +/** + * xmlUCSIsCatMc: + * @code: UCS code point + * + * Check whether the character is part of Mc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMc(int code) { + return((code == 0x903) || + ((code >= 0x93e) && (code <= 0x940)) || + ((code >= 0x949) && (code <= 0x94c)) || + ((code >= 0x982) && (code <= 0x983)) || + ((code >= 0x9be) && (code <= 0x9c0)) || + ((code >= 0x9c7) && (code <= 0x9c8)) || + ((code >= 0x9cb) && (code <= 0x9cc)) || + (code == 0x9d7) || + ((code >= 0xa3e) && (code <= 0xa40)) || + (code == 0xa83) || + ((code >= 0xabe) && (code <= 0xac0)) || + (code == 0xac9) || + ((code >= 0xacb) && (code <= 0xacc)) || + ((code >= 0xb02) && (code <= 0xb03)) || + (code == 0xb3e) || + (code == 0xb40) || + ((code >= 0xb47) && (code <= 0xb48)) || + ((code >= 0xb4b) && (code <= 0xb4c)) || + (code == 0xb57) || + (code == 0xb83) || + ((code >= 0xbbe) && (code <= 0xbbf)) || + ((code >= 0xbc1) && (code <= 0xbc2)) || + ((code >= 0xbc6) && (code <= 0xbc8)) || + ((code >= 0xbca) && (code <= 0xbcc)) || + (code == 0xbd7) || + ((code >= 0xc01) && (code <= 0xc03)) || + ((code >= 0xc41) && (code <= 0xc44)) || + ((code >= 0xc82) && (code <= 0xc83)) || + (code == 0xcbe) || + ((code >= 0xcc0) && (code <= 0xcc4)) || + ((code >= 0xcc7) && (code <= 0xcc8)) || + ((code >= 0xcca) && (code <= 0xccb)) || + ((code >= 0xcd5) && (code <= 0xcd6)) || + ((code >= 0xd02) && (code <= 0xd03)) || + ((code >= 0xd3e) && (code <= 0xd40)) || + ((code >= 0xd46) && (code <= 0xd48)) || + ((code >= 0xd4a) && (code <= 0xd4c)) || + (code == 0xd57) || + ((code >= 0xd82) && (code <= 0xd83)) || + ((code >= 0xdcf) && (code <= 0xdd1)) || + ((code >= 0xdd8) && (code <= 0xddf)) || + ((code >= 0xdf2) && (code <= 0xdf3)) || + ((code >= 0xf3e) && (code <= 0xf3f)) || + (code == 0xf7f) || + (code == 0x102c) || + (code == 0x1031) || + (code == 0x1038) || + ((code >= 0x1056) && (code <= 0x1057)) || + ((code >= 0x17b4) && (code <= 0x17b6)) || + ((code >= 0x17be) && (code <= 0x17c5)) || + ((code >= 0x17c7) && (code <= 0x17c8)) || + ((code >= 0x1d165) && (code <= 0x1d166)) || + ((code >= 0x1d16d) && (code <= 0x1d172))); +} + +/** + * xmlUCSIsCatMe: + * @code: UCS code point + * + * Check whether the character is part of Me UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMe(int code) { + return(((code >= 0x488) && (code <= 0x489)) || + ((code >= 0x6dd) && (code <= 0x6de)) || + ((code >= 0x20dd) && (code <= 0x20e0)) || + ((code >= 0x20e2) && (code <= 0x20e3))); +} + +/** + * xmlUCSIsCatMn: + * @code: UCS code point + * + * Check whether the character is part of Mn UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMn(int code) { + return(((code >= 0x300) && (code <= 0x34e)) || + ((code >= 0x360) && (code <= 0x362)) || + ((code >= 0x483) && (code <= 0x486)) || + ((code >= 0x591) && (code <= 0x5a1)) || + ((code >= 0x5a3) && (code <= 0x5b9)) || + ((code >= 0x5bb) && (code <= 0x5bd)) || + (code == 0x5bf) || + ((code >= 0x5c1) && (code <= 0x5c2)) || + (code == 0x5c4) || + ((code >= 0x64b) && (code <= 0x655)) || + (code == 0x670) || + ((code >= 0x6d6) && (code <= 0x6dc)) || + ((code >= 0x6df) && (code <= 0x6e4)) || + ((code >= 0x6e7) && (code <= 0x6e8)) || + ((code >= 0x6ea) && (code <= 0x6ed)) || + (code == 0x711) || + ((code >= 0x730) && (code <= 0x74a)) || + ((code >= 0x7a6) && (code <= 0x7b0)) || + ((code >= 0x901) && (code <= 0x902)) || + (code == 0x93c) || + ((code >= 0x941) && (code <= 0x948)) || + (code == 0x94d) || + ((code >= 0x951) && (code <= 0x954)) || + ((code >= 0x962) && (code <= 0x963)) || + (code == 0x981) || + (code == 0x9bc) || + ((code >= 0x9c1) && (code <= 0x9c4)) || + (code == 0x9cd) || + ((code >= 0x9e2) && (code <= 0x9e3)) || + (code == 0xa02) || + (code == 0xa3c) || + ((code >= 0xa41) && (code <= 0xa42)) || + ((code >= 0xa47) && (code <= 0xa48)) || + ((code >= 0xa4b) && (code <= 0xa4d)) || + ((code >= 0xa70) && (code <= 0xa71)) || + ((code >= 0xa81) && (code <= 0xa82)) || + (code == 0xabc) || + ((code >= 0xac1) && (code <= 0xac5)) || + ((code >= 0xac7) && (code <= 0xac8)) || + (code == 0xacd) || + (code == 0xb01) || + (code == 0xb3c) || + (code == 0xb3f) || + ((code >= 0xb41) && (code <= 0xb43)) || + (code == 0xb4d) || + (code == 0xb56) || + (code == 0xb82) || + (code == 0xbc0) || + (code == 0xbcd) || + ((code >= 0xc3e) && (code <= 0xc40)) || + ((code >= 0xc46) && (code <= 0xc48)) || + ((code >= 0xc4a) && (code <= 0xc4d)) || + ((code >= 0xc55) && (code <= 0xc56)) || + (code == 0xcbf) || + (code == 0xcc6) || + ((code >= 0xccc) && (code <= 0xccd)) || + ((code >= 0xd41) && (code <= 0xd43)) || + (code == 0xd4d) || + (code == 0xdca) || + ((code >= 0xdd2) && (code <= 0xdd4)) || + (code == 0xdd6) || + (code == 0xe31) || + ((code >= 0xe34) && (code <= 0xe3a)) || + ((code >= 0xe47) && (code <= 0xe4e)) || + (code == 0xeb1) || + ((code >= 0xeb4) && (code <= 0xeb9)) || + ((code >= 0xebb) && (code <= 0xebc)) || + ((code >= 0xec8) && (code <= 0xecd)) || + ((code >= 0xf18) && (code <= 0xf19)) || + (code == 0xf35) || + (code == 0xf37) || + (code == 0xf39) || + ((code >= 0xf71) && (code <= 0xf7e)) || + ((code >= 0xf80) && (code <= 0xf84)) || + ((code >= 0xf86) && (code <= 0xf87)) || + ((code >= 0xf90) && (code <= 0xf97)) || + ((code >= 0xf99) && (code <= 0xfbc)) || + (code == 0xfc6) || + ((code >= 0x102d) && (code <= 0x1030)) || + (code == 0x1032) || + ((code >= 0x1036) && (code <= 0x1037)) || + (code == 0x1039) || + ((code >= 0x1058) && (code <= 0x1059)) || + ((code >= 0x17b7) && (code <= 0x17bd)) || + (code == 0x17c6) || + ((code >= 0x17c9) && (code <= 0x17d3)) || + (code == 0x18a9) || + ((code >= 0x20d0) && (code <= 0x20dc)) || + (code == 0x20e1) || + ((code >= 0x302a) && (code <= 0x302f)) || + ((code >= 0x3099) && (code <= 0x309a)) || + (code == 0xfb1e) || + ((code >= 0xfe20) && (code <= 0xfe23)) || + ((code >= 0x1d167) && (code <= 0x1d169)) || + ((code >= 0x1d17b) && (code <= 0x1d182)) || + ((code >= 0x1d185) && (code <= 0x1d18b)) || + ((code >= 0x1d1aa) && (code <= 0x1d1ad))); +} + +/** + * xmlUCSIsCatN: + * @code: UCS code point + * + * Check whether the character is part of N UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatN(int code) { + return(((code >= 0x30) && (code <= 0x39)) || + ((code >= 0xb2) && (code <= 0xb3)) || + (code == 0xb9) || + ((code >= 0xbc) && (code <= 0xbe)) || + ((code >= 0x660) && (code <= 0x669)) || + ((code >= 0x6f0) && (code <= 0x6f9)) || + ((code >= 0x966) && (code <= 0x96f)) || + ((code >= 0x9e6) && (code <= 0x9ef)) || + ((code >= 0x9f4) && (code <= 0x9f9)) || + ((code >= 0xa66) && (code <= 0xa6f)) || + ((code >= 0xae6) && (code <= 0xaef)) || + ((code >= 0xb66) && (code <= 0xb6f)) || + ((code >= 0xbe7) && (code <= 0xbf2)) || + ((code >= 0xc66) && (code <= 0xc6f)) || + ((code >= 0xce6) && (code <= 0xcef)) || + ((code >= 0xd66) && (code <= 0xd6f)) || + ((code >= 0xe50) && (code <= 0xe59)) || + ((code >= 0xed0) && (code <= 0xed9)) || + ((code >= 0xf20) && (code <= 0xf33)) || + ((code >= 0x1040) && (code <= 0x1049)) || + ((code >= 0x1369) && (code <= 0x137c)) || + ((code >= 0x16ee) && (code <= 0x16f0)) || + ((code >= 0x17e0) && (code <= 0x17e9)) || + ((code >= 0x1810) && (code <= 0x1819)) || + (code == 0x2070) || + ((code >= 0x2074) && (code <= 0x2079)) || + ((code >= 0x2080) && (code <= 0x2089)) || + ((code >= 0x2153) && (code <= 0x2183)) || + ((code >= 0x2460) && (code <= 0x249b)) || + (code == 0x24ea) || + ((code >= 0x2776) && (code <= 0x2793)) || + (code == 0x3007) || + ((code >= 0x3021) && (code <= 0x3029)) || + ((code >= 0x3038) && (code <= 0x303a)) || + ((code >= 0x3192) && (code <= 0x3195)) || + ((code >= 0x3220) && (code <= 0x3229)) || + ((code >= 0x3280) && (code <= 0x3289)) || + ((code >= 0xff10) && (code <= 0xff19)) || + ((code >= 0x10320) && (code <= 0x10323)) || + (code == 0x1034a) || + ((code >= 0x1d7ce) && (code <= 0x1d7ff))); +} + +/** + * xmlUCSIsCatNd: + * @code: UCS code point + * + * Check whether the character is part of Nd UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNd(int code) { + return(((code >= 0x30) && (code <= 0x39)) || + ((code >= 0x660) && (code <= 0x669)) || + ((code >= 0x6f0) && (code <= 0x6f9)) || + ((code >= 0x966) && (code <= 0x96f)) || + ((code >= 0x9e6) && (code <= 0x9ef)) || + ((code >= 0xa66) && (code <= 0xa6f)) || + ((code >= 0xae6) && (code <= 0xaef)) || + ((code >= 0xb66) && (code <= 0xb6f)) || + ((code >= 0xbe7) && (code <= 0xbef)) || + ((code >= 0xc66) && (code <= 0xc6f)) || + ((code >= 0xce6) && (code <= 0xcef)) || + ((code >= 0xd66) && (code <= 0xd6f)) || + ((code >= 0xe50) && (code <= 0xe59)) || + ((code >= 0xed0) && (code <= 0xed9)) || + ((code >= 0xf20) && (code <= 0xf29)) || + ((code >= 0x1040) && (code <= 0x1049)) || + ((code >= 0x1369) && (code <= 0x1371)) || + ((code >= 0x17e0) && (code <= 0x17e9)) || + ((code >= 0x1810) && (code <= 0x1819)) || + ((code >= 0xff10) && (code <= 0xff19)) || + ((code >= 0x1d7ce) && (code <= 0x1d7ff))); +} + +/** + * xmlUCSIsCatNl: + * @code: UCS code point + * + * Check whether the character is part of Nl UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNl(int code) { + return(((code >= 0x16ee) && (code <= 0x16f0)) || + ((code >= 0x2160) && (code <= 0x2183)) || + (code == 0x3007) || + ((code >= 0x3021) && (code <= 0x3029)) || + ((code >= 0x3038) && (code <= 0x303a)) || + (code == 0x1034a)); +} + +/** + * xmlUCSIsCatNo: + * @code: UCS code point + * + * Check whether the character is part of No UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNo(int code) { + return(((code >= 0xb2) && (code <= 0xb3)) || + (code == 0xb9) || + ((code >= 0xbc) && (code <= 0xbe)) || + ((code >= 0x9f4) && (code <= 0x9f9)) || + ((code >= 0xbf0) && (code <= 0xbf2)) || + ((code >= 0xf2a) && (code <= 0xf33)) || + ((code >= 0x1372) && (code <= 0x137c)) || + (code == 0x2070) || + ((code >= 0x2074) && (code <= 0x2079)) || + ((code >= 0x2080) && (code <= 0x2089)) || + ((code >= 0x2153) && (code <= 0x215f)) || + ((code >= 0x2460) && (code <= 0x249b)) || + (code == 0x24ea) || + ((code >= 0x2776) && (code <= 0x2793)) || + ((code >= 0x3192) && (code <= 0x3195)) || + ((code >= 0x3220) && (code <= 0x3229)) || + ((code >= 0x3280) && (code <= 0x3289)) || + ((code >= 0x10320) && (code <= 0x10323))); +} + +/** + * xmlUCSIsCatP: + * @code: UCS code point + * + * Check whether the character is part of P UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatP(int code) { + return(((code >= 0x21) && (code <= 0x23)) || + ((code >= 0x25) && (code <= 0x2a)) || + ((code >= 0x2c) && (code <= 0x2f)) || + ((code >= 0x3a) && (code <= 0x3b)) || + ((code >= 0x3f) && (code <= 0x40)) || + ((code >= 0x5b) && (code <= 0x5d)) || + (code == 0x5f) || + (code == 0x7b) || + (code == 0x7d) || + (code == 0xa1) || + (code == 0xab) || + (code == 0xad) || + (code == 0xb7) || + (code == 0xbb) || + (code == 0xbf) || + (code == 0x37e) || + (code == 0x387) || + ((code >= 0x55a) && (code <= 0x55f)) || + ((code >= 0x589) && (code <= 0x58a)) || + (code == 0x5be) || + (code == 0x5c0) || + (code == 0x5c3) || + ((code >= 0x5f3) && (code <= 0x5f4)) || + (code == 0x60c) || + (code == 0x61b) || + (code == 0x61f) || + ((code >= 0x66a) && (code <= 0x66d)) || + (code == 0x6d4) || + ((code >= 0x700) && (code <= 0x70d)) || + ((code >= 0x964) && (code <= 0x965)) || + (code == 0x970) || + (code == 0xdf4) || + (code == 0xe4f) || + ((code >= 0xe5a) && (code <= 0xe5b)) || + ((code >= 0xf04) && (code <= 0xf12)) || + ((code >= 0xf3a) && (code <= 0xf3d)) || + (code == 0xf85) || + ((code >= 0x104a) && (code <= 0x104f)) || + (code == 0x10fb) || + ((code >= 0x1361) && (code <= 0x1368)) || + ((code >= 0x166d) && (code <= 0x166e)) || + ((code >= 0x169b) && (code <= 0x169c)) || + ((code >= 0x16eb) && (code <= 0x16ed)) || + ((code >= 0x17d4) && (code <= 0x17da)) || + (code == 0x17dc) || + ((code >= 0x1800) && (code <= 0x180a)) || + ((code >= 0x2010) && (code <= 0x2027)) || + ((code >= 0x2030) && (code <= 0x2043)) || + ((code >= 0x2045) && (code <= 0x2046)) || + ((code >= 0x2048) && (code <= 0x204d)) || + ((code >= 0x207d) && (code <= 0x207e)) || + ((code >= 0x208d) && (code <= 0x208e)) || + ((code >= 0x2329) && (code <= 0x232a)) || + ((code >= 0x3001) && (code <= 0x3003)) || + ((code >= 0x3008) && (code <= 0x3011)) || + ((code >= 0x3014) && (code <= 0x301f)) || + (code == 0x3030) || + (code == 0x30fb) || + ((code >= 0xfd3e) && (code <= 0xfd3f)) || + ((code >= 0xfe30) && (code <= 0xfe44)) || + ((code >= 0xfe49) && (code <= 0xfe52)) || + ((code >= 0xfe54) && (code <= 0xfe61)) || + (code == 0xfe63) || + (code == 0xfe68) || + ((code >= 0xfe6a) && (code <= 0xfe6b)) || + ((code >= 0xff01) && (code <= 0xff03)) || + ((code >= 0xff05) && (code <= 0xff0a)) || + ((code >= 0xff0c) && (code <= 0xff0f)) || + ((code >= 0xff1a) && (code <= 0xff1b)) || + ((code >= 0xff1f) && (code <= 0xff20)) || + ((code >= 0xff3b) && (code <= 0xff3d)) || + (code == 0xff3f) || + (code == 0xff5b) || + (code == 0xff5d) || + ((code >= 0xff61) && (code <= 0xff65))); +} + +/** + * xmlUCSIsCatPc: + * @code: UCS code point + * + * Check whether the character is part of Pc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPc(int code) { + return((code == 0x5f) || + ((code >= 0x203f) && (code <= 0x2040)) || + (code == 0x30fb) || + ((code >= 0xfe33) && (code <= 0xfe34)) || + ((code >= 0xfe4d) && (code <= 0xfe4f)) || + (code == 0xff3f) || + (code == 0xff65)); +} + +/** + * xmlUCSIsCatPd: + * @code: UCS code point + * + * Check whether the character is part of Pd UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPd(int code) { + return((code == 0x2d) || + (code == 0xad) || + (code == 0x58a) || + (code == 0x1806) || + ((code >= 0x2010) && (code <= 0x2015)) || + (code == 0x301c) || + (code == 0x3030) || + ((code >= 0xfe31) && (code <= 0xfe32)) || + (code == 0xfe58) || + (code == 0xfe63) || + (code == 0xff0d)); +} + +/** + * xmlUCSIsCatPe: + * @code: UCS code point + * + * Check whether the character is part of Pe UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPe(int code) { + return((code == 0x29) || + (code == 0x5d) || + (code == 0x7d) || + (code == 0xf3b) || + (code == 0xf3d) || + (code == 0x169c) || + (code == 0x2046) || + (code == 0x207e) || + (code == 0x208e) || + (code == 0x232a) || + (code == 0x3009) || + (code == 0x300b) || + (code == 0x300d) || + (code == 0x300f) || + (code == 0x3011) || + (code == 0x3015) || + (code == 0x3017) || + (code == 0x3019) || + (code == 0x301b) || + ((code >= 0x301e) && (code <= 0x301f)) || + (code == 0xfd3f) || + (code == 0xfe36) || + (code == 0xfe38) || + (code == 0xfe3a) || + (code == 0xfe3c) || + (code == 0xfe3e) || + (code == 0xfe40) || + (code == 0xfe42) || + (code == 0xfe44) || + (code == 0xfe5a) || + (code == 0xfe5c) || + (code == 0xfe5e) || + (code == 0xff09) || + (code == 0xff3d) || + (code == 0xff5d) || + (code == 0xff63)); +} + +/** + * xmlUCSIsCatPf: + * @code: UCS code point + * + * Check whether the character is part of Pf UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPf(int code) { + return((code == 0xbb) || + (code == 0x2019) || + (code == 0x201d) || + (code == 0x203a)); +} + +/** + * xmlUCSIsCatPi: + * @code: UCS code point + * + * Check whether the character is part of Pi UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPi(int code) { + return((code == 0xab) || + (code == 0x2018) || + ((code >= 0x201b) && (code <= 0x201c)) || + (code == 0x201f) || + (code == 0x2039)); +} + +/** + * xmlUCSIsCatPo: + * @code: UCS code point + * + * Check whether the character is part of Po UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPo(int code) { + return(((code >= 0x21) && (code <= 0x23)) || + ((code >= 0x25) && (code <= 0x27)) || + (code == 0x2a) || + (code == 0x2c) || + ((code >= 0x2e) && (code <= 0x2f)) || + ((code >= 0x3a) && (code <= 0x3b)) || + ((code >= 0x3f) && (code <= 0x40)) || + (code == 0x5c) || + (code == 0xa1) || + (code == 0xb7) || + (code == 0xbf) || + (code == 0x37e) || + (code == 0x387) || + ((code >= 0x55a) && (code <= 0x55f)) || + (code == 0x589) || + (code == 0x5be) || + (code == 0x5c0) || + (code == 0x5c3) || + ((code >= 0x5f3) && (code <= 0x5f4)) || + (code == 0x60c) || + (code == 0x61b) || + (code == 0x61f) || + ((code >= 0x66a) && (code <= 0x66d)) || + (code == 0x6d4) || + ((code >= 0x700) && (code <= 0x70d)) || + ((code >= 0x964) && (code <= 0x965)) || + (code == 0x970) || + (code == 0xdf4) || + (code == 0xe4f) || + ((code >= 0xe5a) && (code <= 0xe5b)) || + ((code >= 0xf04) && (code <= 0xf12)) || + (code == 0xf85) || + ((code >= 0x104a) && (code <= 0x104f)) || + (code == 0x10fb) || + ((code >= 0x1361) && (code <= 0x1368)) || + ((code >= 0x166d) && (code <= 0x166e)) || + ((code >= 0x16eb) && (code <= 0x16ed)) || + ((code >= 0x17d4) && (code <= 0x17da)) || + (code == 0x17dc) || + ((code >= 0x1800) && (code <= 0x1805)) || + ((code >= 0x1807) && (code <= 0x180a)) || + ((code >= 0x2016) && (code <= 0x2017)) || + ((code >= 0x2020) && (code <= 0x2027)) || + ((code >= 0x2030) && (code <= 0x2038)) || + ((code >= 0x203b) && (code <= 0x203e)) || + ((code >= 0x2041) && (code <= 0x2043)) || + ((code >= 0x2048) && (code <= 0x204d)) || + ((code >= 0x3001) && (code <= 0x3003)) || + (code == 0xfe30) || + ((code >= 0xfe49) && (code <= 0xfe4c)) || + ((code >= 0xfe50) && (code <= 0xfe52)) || + ((code >= 0xfe54) && (code <= 0xfe57)) || + ((code >= 0xfe5f) && (code <= 0xfe61)) || + (code == 0xfe68) || + ((code >= 0xfe6a) && (code <= 0xfe6b)) || + ((code >= 0xff01) && (code <= 0xff03)) || + ((code >= 0xff05) && (code <= 0xff07)) || + (code == 0xff0a) || + (code == 0xff0c) || + ((code >= 0xff0e) && (code <= 0xff0f)) || + ((code >= 0xff1a) && (code <= 0xff1b)) || + ((code >= 0xff1f) && (code <= 0xff20)) || + (code == 0xff3c) || + (code == 0xff61) || + (code == 0xff64)); +} + +/** + * xmlUCSIsCatPs: + * @code: UCS code point + * + * Check whether the character is part of Ps UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPs(int code) { + return((code == 0x28) || + (code == 0x5b) || + (code == 0x7b) || + (code == 0xf3a) || + (code == 0xf3c) || + (code == 0x169b) || + (code == 0x201a) || + (code == 0x201e) || + (code == 0x2045) || + (code == 0x207d) || + (code == 0x208d) || + (code == 0x2329) || + (code == 0x3008) || + (code == 0x300a) || + (code == 0x300c) || + (code == 0x300e) || + (code == 0x3010) || + (code == 0x3014) || + (code == 0x3016) || + (code == 0x3018) || + (code == 0x301a) || + (code == 0x301d) || + (code == 0xfd3e) || + (code == 0xfe35) || + (code == 0xfe37) || + (code == 0xfe39) || + (code == 0xfe3b) || + (code == 0xfe3d) || + (code == 0xfe3f) || + (code == 0xfe41) || + (code == 0xfe43) || + (code == 0xfe59) || + (code == 0xfe5b) || + (code == 0xfe5d) || + (code == 0xff08) || + (code == 0xff3b) || + (code == 0xff5b) || + (code == 0xff62)); +} + +/** + * xmlUCSIsCatS: + * @code: UCS code point + * + * Check whether the character is part of S UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatS(int code) { + return((code == 0x24) || + (code == 0x2b) || + ((code >= 0x3c) && (code <= 0x3e)) || + (code == 0x5e) || + (code == 0x60) || + (code == 0x7c) || + (code == 0x7e) || + ((code >= 0xa2) && (code <= 0xa9)) || + (code == 0xac) || + ((code >= 0xae) && (code <= 0xb1)) || + (code == 0xb4) || + (code == 0xb6) || + (code == 0xb8) || + (code == 0xd7) || + (code == 0xf7) || + ((code >= 0x2b9) && (code <= 0x2ba)) || + ((code >= 0x2c2) && (code <= 0x2cf)) || + ((code >= 0x2d2) && (code <= 0x2df)) || + ((code >= 0x2e5) && (code <= 0x2ed)) || + ((code >= 0x374) && (code <= 0x375)) || + ((code >= 0x384) && (code <= 0x385)) || + (code == 0x482) || + (code == 0x6e9) || + ((code >= 0x6fd) && (code <= 0x6fe)) || + ((code >= 0x9f2) && (code <= 0x9f3)) || + (code == 0x9fa) || + (code == 0xb70) || + (code == 0xe3f) || + ((code >= 0xf01) && (code <= 0xf03)) || + ((code >= 0xf13) && (code <= 0xf17)) || + ((code >= 0xf1a) && (code <= 0xf1f)) || + (code == 0xf34) || + (code == 0xf36) || + (code == 0xf38) || + ((code >= 0xfbe) && (code <= 0xfc5)) || + ((code >= 0xfc7) && (code <= 0xfcc)) || + (code == 0xfcf) || + (code == 0x17db) || + (code == 0x1fbd) || + ((code >= 0x1fbf) && (code <= 0x1fc1)) || + ((code >= 0x1fcd) && (code <= 0x1fcf)) || + ((code >= 0x1fdd) && (code <= 0x1fdf)) || + ((code >= 0x1fed) && (code <= 0x1fef)) || + ((code >= 0x1ffd) && (code <= 0x1ffe)) || + (code == 0x2044) || + ((code >= 0x207a) && (code <= 0x207c)) || + ((code >= 0x208a) && (code <= 0x208c)) || + ((code >= 0x20a0) && (code <= 0x20af)) || + ((code >= 0x2100) && (code <= 0x2101)) || + ((code >= 0x2103) && (code <= 0x2106)) || + ((code >= 0x2108) && (code <= 0x2109)) || + (code == 0x2114) || + ((code >= 0x2116) && (code <= 0x2118)) || + ((code >= 0x211e) && (code <= 0x2123)) || + (code == 0x2125) || + (code == 0x2127) || + (code == 0x2129) || + (code == 0x212e) || + (code == 0x2132) || + (code == 0x213a) || + ((code >= 0x2190) && (code <= 0x21f3)) || + ((code >= 0x2200) && (code <= 0x22f1)) || + ((code >= 0x2300) && (code <= 0x2328)) || + ((code >= 0x232b) && (code <= 0x237b)) || + ((code >= 0x237d) && (code <= 0x239a)) || + ((code >= 0x2400) && (code <= 0x2426)) || + ((code >= 0x2440) && (code <= 0x244a)) || + ((code >= 0x249c) && (code <= 0x24e9)) || + ((code >= 0x2500) && (code <= 0x2595)) || + ((code >= 0x25a0) && (code <= 0x25f7)) || + ((code >= 0x2600) && (code <= 0x2613)) || + ((code >= 0x2619) && (code <= 0x2671)) || + ((code >= 0x2701) && (code <= 0x2704)) || + ((code >= 0x2706) && (code <= 0x2709)) || + ((code >= 0x270c) && (code <= 0x2727)) || + ((code >= 0x2729) && (code <= 0x274b)) || + (code == 0x274d) || + ((code >= 0x274f) && (code <= 0x2752)) || + (code == 0x2756) || + ((code >= 0x2758) && (code <= 0x275e)) || + ((code >= 0x2761) && (code <= 0x2767)) || + (code == 0x2794) || + ((code >= 0x2798) && (code <= 0x27af)) || + ((code >= 0x27b1) && (code <= 0x27be)) || + ((code >= 0x2800) && (code <= 0x28ff)) || + ((code >= 0x2e80) && (code <= 0x2e99)) || + ((code >= 0x2e9b) && (code <= 0x2ef3)) || + ((code >= 0x2f00) && (code <= 0x2fd5)) || + ((code >= 0x2ff0) && (code <= 0x2ffb)) || + (code == 0x3004) || + ((code >= 0x3012) && (code <= 0x3013)) || + (code == 0x3020) || + ((code >= 0x3036) && (code <= 0x3037)) || + ((code >= 0x303e) && (code <= 0x303f)) || + ((code >= 0x309b) && (code <= 0x309c)) || + ((code >= 0x3190) && (code <= 0x3191)) || + ((code >= 0x3196) && (code <= 0x319f)) || + ((code >= 0x3200) && (code <= 0x321c)) || + ((code >= 0x322a) && (code <= 0x3243)) || + ((code >= 0x3260) && (code <= 0x327b)) || + (code == 0x327f) || + ((code >= 0x328a) && (code <= 0x32b0)) || + ((code >= 0x32c0) && (code <= 0x32cb)) || + ((code >= 0x32d0) && (code <= 0x32fe)) || + ((code >= 0x3300) && (code <= 0x3376)) || + ((code >= 0x337b) && (code <= 0x33dd)) || + ((code >= 0x33e0) && (code <= 0x33fe)) || + ((code >= 0xa490) && (code <= 0xa4a1)) || + ((code >= 0xa4a4) && (code <= 0xa4b3)) || + ((code >= 0xa4b5) && (code <= 0xa4c0)) || + ((code >= 0xa4c2) && (code <= 0xa4c4)) || + (code == 0xa4c6) || + (code == 0xfb29) || + (code == 0xfe62) || + ((code >= 0xfe64) && (code <= 0xfe66)) || + (code == 0xfe69) || + (code == 0xff04) || + (code == 0xff0b) || + ((code >= 0xff1c) && (code <= 0xff1e)) || + (code == 0xff3e) || + (code == 0xff40) || + (code == 0xff5c) || + (code == 0xff5e) || + ((code >= 0xffe0) && (code <= 0xffe6)) || + ((code >= 0xffe8) && (code <= 0xffee)) || + ((code >= 0xfffc) && (code <= 0xfffd)) || + ((code >= 0x1d000) && (code <= 0x1d0f5)) || + ((code >= 0x1d100) && (code <= 0x1d126)) || + ((code >= 0x1d12a) && (code <= 0x1d164)) || + ((code >= 0x1d16a) && (code <= 0x1d16c)) || + ((code >= 0x1d183) && (code <= 0x1d184)) || + ((code >= 0x1d18c) && (code <= 0x1d1a9)) || + ((code >= 0x1d1ae) && (code <= 0x1d1dd)) || + (code == 0x1d6c1) || + (code == 0x1d6db) || + (code == 0x1d6fb) || + (code == 0x1d715) || + (code == 0x1d735) || + (code == 0x1d74f) || + (code == 0x1d76f) || + (code == 0x1d789) || + (code == 0x1d7a9) || + (code == 0x1d7c3)); +} + +/** + * xmlUCSIsCatSc: + * @code: UCS code point + * + * Check whether the character is part of Sc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSc(int code) { + return((code == 0x24) || + ((code >= 0xa2) && (code <= 0xa5)) || + ((code >= 0x9f2) && (code <= 0x9f3)) || + (code == 0xe3f) || + (code == 0x17db) || + ((code >= 0x20a0) && (code <= 0x20af)) || + (code == 0xfe69) || + (code == 0xff04) || + ((code >= 0xffe0) && (code <= 0xffe1)) || + ((code >= 0xffe5) && (code <= 0xffe6))); +} + +/** + * xmlUCSIsCatSk: + * @code: UCS code point + * + * Check whether the character is part of Sk UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSk(int code) { + return((code == 0x5e) || + (code == 0x60) || + (code == 0xa8) || + (code == 0xaf) || + (code == 0xb4) || + (code == 0xb8) || + ((code >= 0x2b9) && (code <= 0x2ba)) || + ((code >= 0x2c2) && (code <= 0x2cf)) || + ((code >= 0x2d2) && (code <= 0x2df)) || + ((code >= 0x2e5) && (code <= 0x2ed)) || + ((code >= 0x374) && (code <= 0x375)) || + ((code >= 0x384) && (code <= 0x385)) || + (code == 0x1fbd) || + ((code >= 0x1fbf) && (code <= 0x1fc1)) || + ((code >= 0x1fcd) && (code <= 0x1fcf)) || + ((code >= 0x1fdd) && (code <= 0x1fdf)) || + ((code >= 0x1fed) && (code <= 0x1fef)) || + ((code >= 0x1ffd) && (code <= 0x1ffe)) || + ((code >= 0x309b) && (code <= 0x309c)) || + (code == 0xff3e) || + (code == 0xff40) || + (code == 0xffe3)); +} + +/** + * xmlUCSIsCatSm: + * @code: UCS code point + * + * Check whether the character is part of Sm UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSm(int code) { + return((code == 0x2b) || + ((code >= 0x3c) && (code <= 0x3e)) || + (code == 0x7c) || + (code == 0x7e) || + (code == 0xac) || + (code == 0xb1) || + (code == 0xd7) || + (code == 0xf7) || + (code == 0x2044) || + ((code >= 0x207a) && (code <= 0x207c)) || + ((code >= 0x208a) && (code <= 0x208c)) || + ((code >= 0x2190) && (code <= 0x2194)) || + ((code >= 0x219a) && (code <= 0x219b)) || + (code == 0x21a0) || + (code == 0x21a3) || + (code == 0x21a6) || + (code == 0x21ae) || + ((code >= 0x21ce) && (code <= 0x21cf)) || + (code == 0x21d2) || + (code == 0x21d4) || + ((code >= 0x2200) && (code <= 0x22f1)) || + ((code >= 0x2308) && (code <= 0x230b)) || + ((code >= 0x2320) && (code <= 0x2321)) || + (code == 0x25b7) || + (code == 0x25c1) || + (code == 0x266f) || + (code == 0xfb29) || + (code == 0xfe62) || + ((code >= 0xfe64) && (code <= 0xfe66)) || + (code == 0xff0b) || + ((code >= 0xff1c) && (code <= 0xff1e)) || + (code == 0xff5c) || + (code == 0xff5e) || + (code == 0xffe2) || + ((code >= 0xffe9) && (code <= 0xffec)) || + (code == 0x1d6c1) || + (code == 0x1d6db) || + (code == 0x1d6fb) || + (code == 0x1d715) || + (code == 0x1d735) || + (code == 0x1d74f) || + (code == 0x1d76f) || + (code == 0x1d789) || + (code == 0x1d7a9) || + (code == 0x1d7c3)); +} + +/** + * xmlUCSIsCatSo: + * @code: UCS code point + * + * Check whether the character is part of So UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSo(int code) { + return(((code >= 0xa6) && (code <= 0xa7)) || + (code == 0xa9) || + (code == 0xae) || + (code == 0xb0) || + (code == 0xb6) || + (code == 0x482) || + (code == 0x6e9) || + ((code >= 0x6fd) && (code <= 0x6fe)) || + (code == 0x9fa) || + (code == 0xb70) || + ((code >= 0xf01) && (code <= 0xf03)) || + ((code >= 0xf13) && (code <= 0xf17)) || + ((code >= 0xf1a) && (code <= 0xf1f)) || + (code == 0xf34) || + (code == 0xf36) || + (code == 0xf38) || + ((code >= 0xfbe) && (code <= 0xfc5)) || + ((code >= 0xfc7) && (code <= 0xfcc)) || + (code == 0xfcf) || + ((code >= 0x2100) && (code <= 0x2101)) || + ((code >= 0x2103) && (code <= 0x2106)) || + ((code >= 0x2108) && (code <= 0x2109)) || + (code == 0x2114) || + ((code >= 0x2116) && (code <= 0x2118)) || + ((code >= 0x211e) && (code <= 0x2123)) || + (code == 0x2125) || + (code == 0x2127) || + (code == 0x2129) || + (code == 0x212e) || + (code == 0x2132) || + (code == 0x213a) || + ((code >= 0x2195) && (code <= 0x2199)) || + ((code >= 0x219c) && (code <= 0x219f)) || + ((code >= 0x21a1) && (code <= 0x21a2)) || + ((code >= 0x21a4) && (code <= 0x21a5)) || + ((code >= 0x21a7) && (code <= 0x21ad)) || + ((code >= 0x21af) && (code <= 0x21cd)) || + ((code >= 0x21d0) && (code <= 0x21d1)) || + (code == 0x21d3) || + ((code >= 0x21d5) && (code <= 0x21f3)) || + ((code >= 0x2300) && (code <= 0x2307)) || + ((code >= 0x230c) && (code <= 0x231f)) || + ((code >= 0x2322) && (code <= 0x2328)) || + ((code >= 0x232b) && (code <= 0x237b)) || + ((code >= 0x237d) && (code <= 0x239a)) || + ((code >= 0x2400) && (code <= 0x2426)) || + ((code >= 0x2440) && (code <= 0x244a)) || + ((code >= 0x249c) && (code <= 0x24e9)) || + ((code >= 0x2500) && (code <= 0x2595)) || + ((code >= 0x25a0) && (code <= 0x25b6)) || + ((code >= 0x25b8) && (code <= 0x25c0)) || + ((code >= 0x25c2) && (code <= 0x25f7)) || + ((code >= 0x2600) && (code <= 0x2613)) || + ((code >= 0x2619) && (code <= 0x266e)) || + ((code >= 0x2670) && (code <= 0x2671)) || + ((code >= 0x2701) && (code <= 0x2704)) || + ((code >= 0x2706) && (code <= 0x2709)) || + ((code >= 0x270c) && (code <= 0x2727)) || + ((code >= 0x2729) && (code <= 0x274b)) || + (code == 0x274d) || + ((code >= 0x274f) && (code <= 0x2752)) || + (code == 0x2756) || + ((code >= 0x2758) && (code <= 0x275e)) || + ((code >= 0x2761) && (code <= 0x2767)) || + (code == 0x2794) || + ((code >= 0x2798) && (code <= 0x27af)) || + ((code >= 0x27b1) && (code <= 0x27be)) || + ((code >= 0x2800) && (code <= 0x28ff)) || + ((code >= 0x2e80) && (code <= 0x2e99)) || + ((code >= 0x2e9b) && (code <= 0x2ef3)) || + ((code >= 0x2f00) && (code <= 0x2fd5)) || + ((code >= 0x2ff0) && (code <= 0x2ffb)) || + (code == 0x3004) || + ((code >= 0x3012) && (code <= 0x3013)) || + (code == 0x3020) || + ((code >= 0x3036) && (code <= 0x3037)) || + ((code >= 0x303e) && (code <= 0x303f)) || + ((code >= 0x3190) && (code <= 0x3191)) || + ((code >= 0x3196) && (code <= 0x319f)) || + ((code >= 0x3200) && (code <= 0x321c)) || + ((code >= 0x322a) && (code <= 0x3243)) || + ((code >= 0x3260) && (code <= 0x327b)) || + (code == 0x327f) || + ((code >= 0x328a) && (code <= 0x32b0)) || + ((code >= 0x32c0) && (code <= 0x32cb)) || + ((code >= 0x32d0) && (code <= 0x32fe)) || + ((code >= 0x3300) && (code <= 0x3376)) || + ((code >= 0x337b) && (code <= 0x33dd)) || + ((code >= 0x33e0) && (code <= 0x33fe)) || + ((code >= 0xa490) && (code <= 0xa4a1)) || + ((code >= 0xa4a4) && (code <= 0xa4b3)) || + ((code >= 0xa4b5) && (code <= 0xa4c0)) || + ((code >= 0xa4c2) && (code <= 0xa4c4)) || + (code == 0xa4c6) || + (code == 0xffe4) || + (code == 0xffe8) || + ((code >= 0xffed) && (code <= 0xffee)) || + ((code >= 0xfffc) && (code <= 0xfffd)) || + ((code >= 0x1d000) && (code <= 0x1d0f5)) || + ((code >= 0x1d100) && (code <= 0x1d126)) || + ((code >= 0x1d12a) && (code <= 0x1d164)) || + ((code >= 0x1d16a) && (code <= 0x1d16c)) || + ((code >= 0x1d183) && (code <= 0x1d184)) || + ((code >= 0x1d18c) && (code <= 0x1d1a9)) || + ((code >= 0x1d1ae) && (code <= 0x1d1dd))); +} + +/** + * xmlUCSIsCatZ: + * @code: UCS code point + * + * Check whether the character is part of Z UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZ(int code) { + return((code == 0x20) || + (code == 0xa0) || + (code == 0x1680) || + ((code >= 0x2000) && (code <= 0x200b)) || + ((code >= 0x2028) && (code <= 0x2029)) || + (code == 0x202f) || + (code == 0x3000)); +} + +/** + * xmlUCSIsCatZl: + * @code: UCS code point + * + * Check whether the character is part of Zl UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZl(int code) { + return((code == 0x2028)); +} + +/** + * xmlUCSIsCatZp: + * @code: UCS code point + * + * Check whether the character is part of Zp UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZp(int code) { + return((code == 0x2029)); +} + +/** + * xmlUCSIsCatZs: + * @code: UCS code point + * + * Check whether the character is part of Zs UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZs(int code) { + return((code == 0x20) || + (code == 0xa0) || + (code == 0x1680) || + ((code >= 0x2000) && (code <= 0x200b)) || + (code == 0x202f) || + (code == 0x3000)); +} + +/** + * xmlUCSIsCat: + * @code: UCS code point + * @cat: UCS Category name + * + * Check whether the caracter is part of the UCS Category + * + * Returns 1 if true, 0 if false and -1 on unknown category + */ +int +xmlUCSIsCat(int code, const char *cat) { + if (!strcmp(cat, "C")) + return(xmlUCSIsCatC(code)); + if (!strcmp(cat, "Cc")) + return(xmlUCSIsCatCc(code)); + if (!strcmp(cat, "Cf")) + return(xmlUCSIsCatCf(code)); + if (!strcmp(cat, "Co")) + return(xmlUCSIsCatCo(code)); + if (!strcmp(cat, "Cs")) + return(xmlUCSIsCatCs(code)); + if (!strcmp(cat, "L")) + return(xmlUCSIsCatL(code)); + if (!strcmp(cat, "Ll")) + return(xmlUCSIsCatLl(code)); + if (!strcmp(cat, "Lm")) + return(xmlUCSIsCatLm(code)); + if (!strcmp(cat, "Lo")) + return(xmlUCSIsCatLo(code)); + if (!strcmp(cat, "Lt")) + return(xmlUCSIsCatLt(code)); + if (!strcmp(cat, "Lu")) + return(xmlUCSIsCatLu(code)); + if (!strcmp(cat, "M")) + return(xmlUCSIsCatM(code)); + if (!strcmp(cat, "Mc")) + return(xmlUCSIsCatMc(code)); + if (!strcmp(cat, "Me")) + return(xmlUCSIsCatMe(code)); + if (!strcmp(cat, "Mn")) + return(xmlUCSIsCatMn(code)); + if (!strcmp(cat, "N")) + return(xmlUCSIsCatN(code)); + if (!strcmp(cat, "Nd")) + return(xmlUCSIsCatNd(code)); + if (!strcmp(cat, "Nl")) + return(xmlUCSIsCatNl(code)); + if (!strcmp(cat, "No")) + return(xmlUCSIsCatNo(code)); + if (!strcmp(cat, "P")) + return(xmlUCSIsCatP(code)); + if (!strcmp(cat, "Pc")) + return(xmlUCSIsCatPc(code)); + if (!strcmp(cat, "Pd")) + return(xmlUCSIsCatPd(code)); + if (!strcmp(cat, "Pe")) + return(xmlUCSIsCatPe(code)); + if (!strcmp(cat, "Pf")) + return(xmlUCSIsCatPf(code)); + if (!strcmp(cat, "Pi")) + return(xmlUCSIsCatPi(code)); + if (!strcmp(cat, "Po")) + return(xmlUCSIsCatPo(code)); + if (!strcmp(cat, "Ps")) + return(xmlUCSIsCatPs(code)); + if (!strcmp(cat, "S")) + return(xmlUCSIsCatS(code)); + if (!strcmp(cat, "Sc")) + return(xmlUCSIsCatSc(code)); + if (!strcmp(cat, "Sk")) + return(xmlUCSIsCatSk(code)); + if (!strcmp(cat, "Sm")) + return(xmlUCSIsCatSm(code)); + if (!strcmp(cat, "So")) + return(xmlUCSIsCatSo(code)); + if (!strcmp(cat, "Z")) + return(xmlUCSIsCatZ(code)); + if (!strcmp(cat, "Zl")) + return(xmlUCSIsCatZl(code)); + if (!strcmp(cat, "Zp")) + return(xmlUCSIsCatZp(code)); + if (!strcmp(cat, "Zs")) + return(xmlUCSIsCatZs(code)); + return(-1); +} + + +#endif /* LIBXML_UNICODE_ENABLED */ diff --git a/bundle/libxml/xpath.c b/bundle/libxml/xpath.c new file mode 100644 index 0000000000..305bccf335 --- /dev/null +++ b/bundle/libxml/xpath.c @@ -0,0 +1,10990 @@ +/* + * xpath.c: XML Path Language implementation + * XPath is a language for addressing parts of an XML document, + * designed to be used by both XSLT and XPointer + * + * Reference: W3C Recommendation 16 November 1999 + * http://www.w3.org/TR/1999/REC-xpath-19991116 + * Public reference: + * http://www.w3.org/TR/xpath + * + * See Copyright for the status of this software + * + * Author: daniel@veillard.com + * + */ + +#define IN_LIBXML +#include "libxml.h" +#ifdef LIBXML_XPATH_ENABLED + +#include <string.h> + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_MATH_H +#include <math.h> +#endif +#ifdef HAVE_FLOAT_H +#include <float.h> +#endif +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#ifdef HAVE_SIGNAL_H +#include <signal.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/tree.h> +#include <libxml/valid.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include <libxml/parserInternals.h> +#include <libxml/hash.h> +#ifdef LIBXML_XPTR_ENABLED +#include <libxml/xpointer.h> +#endif +#ifdef LIBXML_DEBUG_ENABLED +#include <libxml/debugXML.h> +#endif +#include <libxml/xmlerror.h> +#include <libxml/threads.h> +#include <libxml/globals.h> + +/* #define DEBUG */ +/* #define DEBUG_STEP */ +/* #define DEBUG_STEP_NTH */ +/* #define DEBUG_EXPR */ +/* #define DEBUG_EVAL_COUNTS */ + +static xmlNs xmlXPathXMLNamespaceStruct = { + NULL, + XML_NAMESPACE_DECL, + XML_XML_NAMESPACE, + BAD_CAST "xml", + NULL +}; +static xmlNsPtr xmlXPathXMLNamespace = &xmlXPathXMLNamespaceStruct; +#ifndef LIBXML_THREAD_ENABLED +/* + * Optimizer is disabled only when threaded apps are detected while + * the library ain't compiled for thread safety. + */ +static int xmlXPathDisableOptimizer = 0; +#endif + +/************************************************************************ + * * + * Floating point stuff * + * * + ************************************************************************/ + +#ifndef TRIO_REPLACE_STDIO +#define TRIO_PUBLIC static +#endif +#include "trionan.c" + +/* + * The lack of portability of this section of the libc is annoying ! + */ +double xmlXPathNAN = 0; +double xmlXPathPINF = 1; +double xmlXPathNINF = -1; +double xmlXPathNZERO = 0; +static int xmlXPathInitialized = 0; + +/** + * xmlXPathInit: + * + * Initialize the XPath environment + */ +void +xmlXPathInit(void) { + if (xmlXPathInitialized) return; + + xmlXPathPINF = trio_pinf(); + xmlXPathNINF = trio_ninf(); + xmlXPathNAN = trio_nan(); + xmlXPathNZERO = trio_nzero(); + + xmlXPathInitialized = 1; +} + +/** + * xmlXPathIsNaN: + * @val: a double value + * + * Provides a portable isnan() function to detect whether a double + * is a NotaNumber. Based on trio code + * http://sourceforge.net/projects/ctrio/ + * + * Returns 1 if the value is a NaN, 0 otherwise + */ +int +xmlXPathIsNaN(double val) { + return(trio_isnan(val)); +} + +/** + * xmlXPathIsInf: + * @val: a double value + * + * Provides a portable isinf() function to detect whether a double + * is a +Infinite or -Infinite. Based on trio code + * http://sourceforge.net/projects/ctrio/ + * + * Returns 1 vi the value is +Infinite, -1 if -Infinite, 0 otherwise + */ +int +xmlXPathIsInf(double val) { + return(trio_isinf(val)); +} + +/** + * xmlXPathGetSign: + * @val: a double value + * + * Provides a portable function to detect the sign of a double + * Modified from trio code + * http://sourceforge.net/projects/ctrio/ + * + * Returns 1 if the value is Negative, 0 if positive + */ +static int +xmlXPathGetSign(double val) { + return(trio_signbit(val)); +} + + +/************************************************************************ + * * + * Parser Types * + * * + ************************************************************************/ + +/* + * Types are private: + */ + +typedef enum { + XPATH_OP_END=0, + XPATH_OP_AND, + XPATH_OP_OR, + XPATH_OP_EQUAL, + XPATH_OP_CMP, + XPATH_OP_PLUS, + XPATH_OP_MULT, + XPATH_OP_UNION, + XPATH_OP_ROOT, + XPATH_OP_NODE, + XPATH_OP_RESET, + XPATH_OP_COLLECT, + XPATH_OP_VALUE, + XPATH_OP_VARIABLE, + XPATH_OP_FUNCTION, + XPATH_OP_ARG, + XPATH_OP_PREDICATE, + XPATH_OP_FILTER, + XPATH_OP_SORT +#ifdef LIBXML_XPTR_ENABLED + ,XPATH_OP_RANGETO +#endif +} xmlXPathOp; + +typedef enum { + AXIS_ANCESTOR = 1, + AXIS_ANCESTOR_OR_SELF, + AXIS_ATTRIBUTE, + AXIS_CHILD, + AXIS_DESCENDANT, + AXIS_DESCENDANT_OR_SELF, + AXIS_FOLLOWING, + AXIS_FOLLOWING_SIBLING, + AXIS_NAMESPACE, + AXIS_PARENT, + AXIS_PRECEDING, + AXIS_PRECEDING_SIBLING, + AXIS_SELF +} xmlXPathAxisVal; + +typedef enum { + NODE_TEST_NONE = 0, + NODE_TEST_TYPE = 1, + NODE_TEST_PI = 2, + NODE_TEST_ALL = 3, + NODE_TEST_NS = 4, + NODE_TEST_NAME = 5 +} xmlXPathTestVal; + +typedef enum { + NODE_TYPE_NODE = 0, + NODE_TYPE_COMMENT = XML_COMMENT_NODE, + NODE_TYPE_TEXT = XML_TEXT_NODE, + NODE_TYPE_PI = XML_PI_NODE +} xmlXPathTypeVal; + + +typedef struct _xmlXPathStepOp xmlXPathStepOp; +typedef xmlXPathStepOp *xmlXPathStepOpPtr; +struct _xmlXPathStepOp { + xmlXPathOp op; + int ch1; + int ch2; + int value; + int value2; + int value3; + void *value4; + void *value5; + void *cache; + void *cacheURI; +}; + +struct _xmlXPathCompExpr { + int nbStep; + int maxStep; + xmlXPathStepOp *steps; /* ops for computation */ + int last; + xmlChar *expr; +#ifdef DEBUG_EVAL_COUNTS + int nb; + xmlChar *string; +#endif +}; + +/************************************************************************ + * * + * Parser Type functions * + * * + ************************************************************************/ + +/** + * xmlXPathNewCompExpr: + * + * Create a new Xpath component + * + * Returns the newly allocated xmlXPathCompExprPtr or NULL in case of error + */ +static xmlXPathCompExprPtr +xmlXPathNewCompExpr(void) { + xmlXPathCompExprPtr cur; + + cur = (xmlXPathCompExprPtr) xmlMalloc(sizeof(xmlXPathCompExpr)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewCompExpr : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlXPathCompExpr)); + cur->maxStep = 10; + cur->nbStep = 0; + cur->steps = (xmlXPathStepOp *) xmlMalloc(cur->maxStep * + sizeof(xmlXPathStepOp)); + if (cur->steps == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewCompExpr : malloc failed\n"); + xmlFree(cur); + return(NULL); + } + memset(cur->steps, 0, cur->maxStep * sizeof(xmlXPathStepOp)); + cur->last = -1; +#ifdef DEBUG_EVAL_COUNTS + cur->nb = 0; +#endif + return(cur); +} + +/** + * xmlXPathFreeCompExpr: + * @comp: an XPATH comp + * + * Free up the memory allocated by @comp + */ +void +xmlXPathFreeCompExpr(xmlXPathCompExprPtr comp) +{ + xmlXPathStepOpPtr op; + int i; + + if (comp == NULL) + return; + for (i = 0; i < comp->nbStep; i++) { + op = &comp->steps[i]; + if (op->value4 != NULL) { + if (op->op == XPATH_OP_VALUE) + xmlXPathFreeObject(op->value4); + else + xmlFree(op->value4); + } + if (op->value5 != NULL) + xmlFree(op->value5); + } + if (comp->steps != NULL) { + xmlFree(comp->steps); + } +#ifdef DEBUG_EVAL_COUNTS + if (comp->string != NULL) { + xmlFree(comp->string); + } +#endif + if (comp->expr != NULL) { + xmlFree(comp->expr); + } + + xmlFree(comp); +} + +/** + * xmlXPathCompExprAdd: + * @comp: the compiled expression + * @ch1: first child index + * @ch2: second child index + * @op: an op + * @value: the first int value + * @value2: the second int value + * @value3: the third int value + * @value4: the first string value + * @value5: the second string value + * + * Add an step to an XPath Compiled Expression + * + * Returns -1 in case of failure, the index otherwise + */ +static int +xmlXPathCompExprAdd(xmlXPathCompExprPtr comp, int ch1, int ch2, + xmlXPathOp op, int value, + int value2, int value3, void *value4, void *value5) { + if (comp->nbStep >= comp->maxStep) { + xmlXPathStepOp *real; + + comp->maxStep *= 2; + real = (xmlXPathStepOp *) xmlRealloc(comp->steps, + comp->maxStep * sizeof(xmlXPathStepOp)); + if (real == NULL) { + comp->maxStep /= 2; + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompExprAdd : realloc failed\n"); + return(-1); + } + comp->steps = real; + } + comp->last = comp->nbStep; + comp->steps[comp->nbStep].ch1 = ch1; + comp->steps[comp->nbStep].ch2 = ch2; + comp->steps[comp->nbStep].op = op; + comp->steps[comp->nbStep].value = value; + comp->steps[comp->nbStep].value2 = value2; + comp->steps[comp->nbStep].value3 = value3; + comp->steps[comp->nbStep].value4 = value4; + comp->steps[comp->nbStep].value5 = value5; + comp->steps[comp->nbStep].cache = NULL; + return(comp->nbStep++); +} + +/** + * xmlXPathCompSwap: + * @comp: the compiled expression + * @op: operation index + * + * Swaps 2 operations in the compiled expression + */ +static void +xmlXPathCompSwap(xmlXPathStepOpPtr op) { + int tmp; + +#ifndef LIBXML_THREAD_ENABLED + /* + * Since this manipulates possibly shared variables, this is + * disable if one detects that the library is used in a multithreaded + * application + */ + if (xmlXPathDisableOptimizer) + return; +#endif + + tmp = op->ch1; + op->ch1 = op->ch2; + op->ch2 = tmp; +} + +#define PUSH_FULL_EXPR(op, op1, op2, val, val2, val3, val4, val5) \ + xmlXPathCompExprAdd(ctxt->comp, (op1), (op2), \ + (op), (val), (val2), (val3), (val4), (val5)) +#define PUSH_LONG_EXPR(op, val, val2, val3, val4, val5) \ + xmlXPathCompExprAdd(ctxt->comp, ctxt->comp->last, -1, \ + (op), (val), (val2), (val3), (val4), (val5)) + +#define PUSH_LEAVE_EXPR(op, val, val2) \ +xmlXPathCompExprAdd(ctxt->comp, -1, -1, (op), (val), (val2), 0 ,NULL ,NULL) + +#define PUSH_UNARY_EXPR(op, ch, val, val2) \ +xmlXPathCompExprAdd(ctxt->comp, (ch), -1, (op), (val), (val2), 0 ,NULL ,NULL) + +#define PUSH_BINARY_EXPR(op, ch1, ch2, val, val2) \ +xmlXPathCompExprAdd(ctxt->comp, (ch1), (ch2), (op), (val), (val2), 0 ,NULL ,NULL) + +/************************************************************************ + * * + * Debugging related functions * + * * + ************************************************************************/ + +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#define STRANGE \ + xmlGenericError(xmlGenericErrorContext, \ + "Internal error at %s:%d\n", \ + __FILE__, __LINE__); + +#ifdef LIBXML_DEBUG_ENABLED +static void +xmlXPathDebugDumpNode(FILE *output, xmlNodePtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + if (cur == NULL) { + fprintf(output, shift); + fprintf(output, "Node is NULL !\n"); + return; + + } + + if ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + fprintf(output, shift); + fprintf(output, " /\n"); + } else if (cur->type == XML_ATTRIBUTE_NODE) + xmlDebugDumpAttr(output, (xmlAttrPtr)cur, depth); + else + xmlDebugDumpOneNode(output, cur, depth); +} +static void +xmlXPathDebugDumpNodeList(FILE *output, xmlNodePtr cur, int depth) { + xmlNodePtr tmp; + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + if (cur == NULL) { + fprintf(output, shift); + fprintf(output, "Node is NULL !\n"); + return; + + } + + while (cur != NULL) { + tmp = cur; + cur = cur->next; + xmlDebugDumpOneNode(output, tmp, depth); + } +} + +static void +xmlXPathDebugDumpNodeSet(FILE *output, xmlNodeSetPtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + if (cur == NULL) { + fprintf(output, shift); + fprintf(output, "NodeSet is NULL !\n"); + return; + + } + + if (cur != NULL) { + fprintf(output, "Set contains %d nodes:\n", cur->nodeNr); + for (i = 0;i < cur->nodeNr;i++) { + fprintf(output, shift); + fprintf(output, "%d", i + 1); + xmlXPathDebugDumpNode(output, cur->nodeTab[i], depth + 1); + } + } +} + +static void +xmlXPathDebugDumpValueTree(FILE *output, xmlNodeSetPtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + if ((cur == NULL) || (cur->nodeNr == 0) || (cur->nodeTab[0] == NULL)) { + fprintf(output, shift); + fprintf(output, "Value Tree is NULL !\n"); + return; + + } + + fprintf(output, shift); + fprintf(output, "%d", i + 1); + xmlXPathDebugDumpNodeList(output, cur->nodeTab[0]->children, depth + 1); +} +#if defined(LIBXML_XPTR_ENABLED) +static void +xmlXPathDebugDumpLocationSet(FILE *output, xmlLocationSetPtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + if (cur == NULL) { + fprintf(output, shift); + fprintf(output, "LocationSet is NULL !\n"); + return; + + } + + for (i = 0;i < cur->locNr;i++) { + fprintf(output, shift); + fprintf(output, "%d : ", i + 1); + xmlXPathDebugDumpObject(output, cur->locTab[i], depth + 1); + } +} +#endif /* LIBXML_XPTR_ENABLED */ + +/** + * xmlXPathDebugDumpObject: + * @output: the FILE * to dump the output + * @cur: the object to inspect + * @depth: indentation level + * + * Dump the content of the object for debugging purposes + */ +void +xmlXPathDebugDumpObject(FILE *output, xmlXPathObjectPtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (cur == NULL) { + fprintf(output, "Object is empty (NULL)\n"); + return; + } + switch(cur->type) { + case XPATH_UNDEFINED: + fprintf(output, "Object is uninitialized\n"); + break; + case XPATH_NODESET: + fprintf(output, "Object is a Node Set :\n"); + xmlXPathDebugDumpNodeSet(output, cur->nodesetval, depth); + break; + case XPATH_XSLT_TREE: + fprintf(output, "Object is an XSLT value tree :\n"); + xmlXPathDebugDumpValueTree(output, cur->nodesetval, depth); + break; + case XPATH_BOOLEAN: + fprintf(output, "Object is a Boolean : "); + if (cur->boolval) fprintf(output, "true\n"); + else fprintf(output, "false\n"); + break; + case XPATH_NUMBER: + switch (xmlXPathIsInf(cur->floatval)) { + case 1: + fprintf(output, "Object is a number : Infinity\n"); + break; + case -1: + fprintf(output, "Object is a number : -Infinity\n"); + break; + default: + if (xmlXPathIsNaN(cur->floatval)) { + fprintf(output, "Object is a number : NaN\n"); + } else if (cur->floatval == 0 && xmlXPathGetSign(cur->floatval) != 0) { + fprintf(output, "Object is a number : 0\n"); + } else { + fprintf(output, "Object is a number : %0g\n", cur->floatval); + } + } + break; + case XPATH_STRING: + fprintf(output, "Object is a string : "); + xmlDebugDumpString(output, cur->stringval); + fprintf(output, "\n"); + break; + case XPATH_POINT: + fprintf(output, "Object is a point : index %d in node", cur->index); + xmlXPathDebugDumpNode(output, (xmlNodePtr) cur->user, depth + 1); + fprintf(output, "\n"); + break; + case XPATH_RANGE: + if ((cur->user2 == NULL) || + ((cur->user2 == cur->user) && (cur->index == cur->index2))) { + fprintf(output, "Object is a collapsed range :\n"); + fprintf(output, shift); + if (cur->index >= 0) + fprintf(output, "index %d in ", cur->index); + fprintf(output, "node\n"); + xmlXPathDebugDumpNode(output, (xmlNodePtr) cur->user, + depth + 1); + } else { + fprintf(output, "Object is a range :\n"); + fprintf(output, shift); + fprintf(output, "From "); + if (cur->index >= 0) + fprintf(output, "index %d in ", cur->index); + fprintf(output, "node\n"); + xmlXPathDebugDumpNode(output, (xmlNodePtr) cur->user, + depth + 1); + fprintf(output, shift); + fprintf(output, "To "); + if (cur->index2 >= 0) + fprintf(output, "index %d in ", cur->index2); + fprintf(output, "node\n"); + xmlXPathDebugDumpNode(output, (xmlNodePtr) cur->user2, + depth + 1); + fprintf(output, "\n"); + } + break; + case XPATH_LOCATIONSET: +#if defined(LIBXML_XPTR_ENABLED) + fprintf(output, "Object is a Location Set:\n"); + xmlXPathDebugDumpLocationSet(output, + (xmlLocationSetPtr) cur->user, depth); +#endif + break; + case XPATH_USERS: + fprintf(output, "Object is user defined\n"); + break; + } +} + +static void +xmlXPathDebugDumpStepOp(FILE *output, xmlXPathCompExprPtr comp, + xmlXPathStepOpPtr op, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + if (op == NULL) { + fprintf(output, "Step is NULL\n"); + return; + } + switch (op->op) { + case XPATH_OP_END: + fprintf(output, "END"); break; + case XPATH_OP_AND: + fprintf(output, "AND"); break; + case XPATH_OP_OR: + fprintf(output, "OR"); break; + case XPATH_OP_EQUAL: + if (op->value) + fprintf(output, "EQUAL ="); + else + fprintf(output, "EQUAL !="); + break; + case XPATH_OP_CMP: + if (op->value) + fprintf(output, "CMP <"); + else + fprintf(output, "CMP >"); + if (!op->value2) + fprintf(output, "="); + break; + case XPATH_OP_PLUS: + if (op->value == 0) + fprintf(output, "PLUS -"); + else if (op->value == 1) + fprintf(output, "PLUS +"); + else if (op->value == 2) + fprintf(output, "PLUS unary -"); + else if (op->value == 3) + fprintf(output, "PLUS unary - -"); + break; + case XPATH_OP_MULT: + if (op->value == 0) + fprintf(output, "MULT *"); + else if (op->value == 1) + fprintf(output, "MULT div"); + else + fprintf(output, "MULT mod"); + break; + case XPATH_OP_UNION: + fprintf(output, "UNION"); break; + case XPATH_OP_ROOT: + fprintf(output, "ROOT"); break; + case XPATH_OP_NODE: + fprintf(output, "NODE"); break; + case XPATH_OP_RESET: + fprintf(output, "RESET"); break; + case XPATH_OP_SORT: + fprintf(output, "SORT"); break; + case XPATH_OP_COLLECT: { + xmlXPathAxisVal axis = op->value; + xmlXPathTestVal test = op->value2; + xmlXPathTypeVal type = op->value3; + const xmlChar *prefix = op->value4; + const xmlChar *name = op->value5; + + fprintf(output, "COLLECT "); + switch (axis) { + case AXIS_ANCESTOR: + fprintf(output, " 'ancestors' "); break; + case AXIS_ANCESTOR_OR_SELF: + fprintf(output, " 'ancestors-or-self' "); break; + case AXIS_ATTRIBUTE: + fprintf(output, " 'attributes' "); break; + case AXIS_CHILD: + fprintf(output, " 'child' "); break; + case AXIS_DESCENDANT: + fprintf(output, " 'descendant' "); break; + case AXIS_DESCENDANT_OR_SELF: + fprintf(output, " 'descendant-or-self' "); break; + case AXIS_FOLLOWING: + fprintf(output, " 'following' "); break; + case AXIS_FOLLOWING_SIBLING: + fprintf(output, " 'following-siblings' "); break; + case AXIS_NAMESPACE: + fprintf(output, " 'namespace' "); break; + case AXIS_PARENT: + fprintf(output, " 'parent' "); break; + case AXIS_PRECEDING: + fprintf(output, " 'preceding' "); break; + case AXIS_PRECEDING_SIBLING: + fprintf(output, " 'preceding-sibling' "); break; + case AXIS_SELF: + fprintf(output, " 'self' "); break; + } + switch (test) { + case NODE_TEST_NONE: + fprintf(output, "'none' "); break; + case NODE_TEST_TYPE: + fprintf(output, "'type' "); break; + case NODE_TEST_PI: + fprintf(output, "'PI' "); break; + case NODE_TEST_ALL: + fprintf(output, "'all' "); break; + case NODE_TEST_NS: + fprintf(output, "'namespace' "); break; + case NODE_TEST_NAME: + fprintf(output, "'name' "); break; + } + switch (type) { + case NODE_TYPE_NODE: + fprintf(output, "'node' "); break; + case NODE_TYPE_COMMENT: + fprintf(output, "'comment' "); break; + case NODE_TYPE_TEXT: + fprintf(output, "'text' "); break; + case NODE_TYPE_PI: + fprintf(output, "'PI' "); break; + } + if (prefix != NULL) + fprintf(output, "%s:", prefix); + if (name != NULL) + fprintf(output, "%s", name); + break; + + } + case XPATH_OP_VALUE: { + xmlXPathObjectPtr object = (xmlXPathObjectPtr) op->value4; + + fprintf(output, "ELEM "); + xmlXPathDebugDumpObject(output, object, 0); + goto finish; + } + case XPATH_OP_VARIABLE: { + const xmlChar *prefix = op->value5; + const xmlChar *name = op->value4; + + if (prefix != NULL) + fprintf(output, "VARIABLE %s:%s", prefix, name); + else + fprintf(output, "VARIABLE %s", name); + break; + } + case XPATH_OP_FUNCTION: { + int nbargs = op->value; + const xmlChar *prefix = op->value5; + const xmlChar *name = op->value4; + + if (prefix != NULL) + fprintf(output, "FUNCTION %s:%s(%d args)", + prefix, name, nbargs); + else + fprintf(output, "FUNCTION %s(%d args)", name, nbargs); + break; + } + case XPATH_OP_ARG: fprintf(output, "ARG"); break; + case XPATH_OP_PREDICATE: fprintf(output, "PREDICATE"); break; + case XPATH_OP_FILTER: fprintf(output, "FILTER"); break; +#ifdef LIBXML_XPTR_ENABLED + case XPATH_OP_RANGETO: fprintf(output, "RANGETO"); break; +#endif + default: + fprintf(output, "UNKNOWN %d\n", op->op); return; + } + fprintf(output, "\n"); +finish: + if (op->ch1 >= 0) + xmlXPathDebugDumpStepOp(output, comp, &comp->steps[op->ch1], depth + 1); + if (op->ch2 >= 0) + xmlXPathDebugDumpStepOp(output, comp, &comp->steps[op->ch2], depth + 1); +} + +/** + * xmlXPathDebugDumpCompExpr: + * @output: the FILE * for the output + * @comp: the precompiled XPath expression + * @depth: the indentation level. + * + * Dumps the tree of the compiled XPath expression. + */ +void +xmlXPathDebugDumpCompExpr(FILE *output, xmlXPathCompExprPtr comp, + int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (comp == NULL) { + fprintf(output, "Compiled Expression is NULL\n"); + return; + } + fprintf(output, "Compiled Expression : %d elements\n", + comp->nbStep); + i = comp->last; + xmlXPathDebugDumpStepOp(output, comp, &comp->steps[i], depth + 1); +} +#endif /* LIBXML_DEBUG_ENABLED */ + +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ + +/** + * valuePop: + * @ctxt: an XPath evaluation context + * + * Pops the top XPath object from the value stack + * + * Returns the XPath object just removed + */ +extern xmlXPathObjectPtr +valuePop(xmlXPathParserContextPtr ctxt) +{ + xmlXPathObjectPtr ret; + + if (ctxt->valueNr <= 0) + return (0); + ctxt->valueNr--; + if (ctxt->valueNr > 0) + ctxt->value = ctxt->valueTab[ctxt->valueNr - 1]; + else + ctxt->value = NULL; + ret = ctxt->valueTab[ctxt->valueNr]; + ctxt->valueTab[ctxt->valueNr] = 0; + return (ret); +} +/** + * valuePush: + * @ctxt: an XPath evaluation context + * @value: the XPath object + * + * Pushes a new XPath object on top of the value stack + * + * returns the number of items on the value stack + */ +extern int +valuePush(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr value) +{ + if (ctxt->valueNr >= ctxt->valueMax) { + ctxt->valueMax *= 2; + ctxt->valueTab = + (xmlXPathObjectPtr *) xmlRealloc(ctxt->valueTab, + ctxt->valueMax * + sizeof(ctxt->valueTab[0])); + if (ctxt->valueTab == NULL) { + xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); + return (0); + } + } + ctxt->valueTab[ctxt->valueNr] = value; + ctxt->value = value; + return (ctxt->valueNr++); +} + +/** + * xmlXPathPopBoolean: + * @ctxt: an XPath parser context + * + * Pops a boolean from the stack, handling conversion if needed. + * Check error with #xmlXPathCheckError. + * + * Returns the boolean + */ +int +xmlXPathPopBoolean (xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr obj; + int ret; + + obj = valuePop(ctxt); + if (obj == NULL) { + xmlXPathSetError(ctxt, XPATH_INVALID_OPERAND); + return(0); + } + ret = xmlXPathCastToBoolean(obj); + xmlXPathFreeObject(obj); + return(ret); +} + +/** + * xmlXPathPopNumber: + * @ctxt: an XPath parser context + * + * Pops a number from the stack, handling conversion if needed. + * Check error with #xmlXPathCheckError. + * + * Returns the number + */ +double +xmlXPathPopNumber (xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr obj; + double ret; + + obj = valuePop(ctxt); + if (obj == NULL) { + xmlXPathSetError(ctxt, XPATH_INVALID_OPERAND); + return(0); + } + ret = xmlXPathCastToNumber(obj); + xmlXPathFreeObject(obj); + return(ret); +} + +/** + * xmlXPathPopString: + * @ctxt: an XPath parser context + * + * Pops a string from the stack, handling conversion if needed. + * Check error with #xmlXPathCheckError. + * + * Returns the string + */ +xmlChar * +xmlXPathPopString (xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr obj; + xmlChar * ret; + + obj = valuePop(ctxt); + if (obj == NULL) { + xmlXPathSetError(ctxt, XPATH_INVALID_OPERAND); + return(NULL); + } + ret = xmlXPathCastToString(obj); + /* TODO: needs refactoring somewhere else */ + if (obj->stringval == ret) + obj->stringval = NULL; + xmlXPathFreeObject(obj); + return(ret); +} + +/** + * xmlXPathPopNodeSet: + * @ctxt: an XPath parser context + * + * Pops a node-set from the stack, handling conversion if needed. + * Check error with #xmlXPathCheckError. + * + * Returns the node-set + */ +xmlNodeSetPtr +xmlXPathPopNodeSet (xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr obj; + xmlNodeSetPtr ret; + + if (ctxt->value == NULL) { + xmlXPathSetError(ctxt, XPATH_INVALID_OPERAND); + return(NULL); + } + if (!xmlXPathStackIsNodeSet(ctxt)) { + xmlXPathSetTypeError(ctxt); + return(NULL); + } + obj = valuePop(ctxt); + ret = obj->nodesetval; + xmlXPathFreeNodeSetList(obj); + return(ret); +} + +/** + * xmlXPathPopExternal: + * @ctxt: an XPath parser context + * + * Pops an external object from the stack, handling conversion if needed. + * Check error with #xmlXPathCheckError. + * + * Returns the object + */ +void * +xmlXPathPopExternal (xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr obj; + void * ret; + + if (ctxt->value == NULL) { + xmlXPathSetError(ctxt, XPATH_INVALID_OPERAND); + return(NULL); + } + if (ctxt->value->type != XPATH_USERS) { + xmlXPathSetTypeError(ctxt); + return(NULL); + } + obj = valuePop(ctxt); + ret = obj->user; + xmlXPathFreeObject(obj); + return(ret); +} + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one need to make assumption on the context to use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * CUR returns the current xmlChar value, i.e. a 8 bit value + * in ISO-Latin or UTF-8. + * This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * CURRENT Returns the current char value, with the full decoding of + * UTF-8 if we are using this mode. It returns an int. + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * It returns the pointer to the current xmlChar. + */ + +#define CUR (*ctxt->cur) +#define SKIP(val) ctxt->cur += (val) +#define NXT(val) ctxt->cur[(val)] +#define CUR_PTR ctxt->cur +#define CUR_CHAR(l) xmlXPathCurrentChar(ctxt, &l) + +#define COPY_BUF(l,b,i,v) \ + if (l == 1) b[i++] = (xmlChar) v; \ + else i += xmlCopyChar(l,&b[i],v) + +#define NEXTL(l) ctxt->cur += l + +#define SKIP_BLANKS \ + while (IS_BLANK(*(ctxt->cur))) NEXT + +#define CURRENT (*ctxt->cur) +#define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur) + + +#ifndef DBL_DIG +#define DBL_DIG 16 +#endif +#ifndef DBL_EPSILON +#define DBL_EPSILON 1E-9 +#endif + +#define UPPER_DOUBLE 1E9 +#define LOWER_DOUBLE 1E-5 + +#define INTEGER_DIGITS DBL_DIG +#define FRACTION_DIGITS (DBL_DIG + 1) +#define EXPONENT_DIGITS (3 + 2) + +/** + * xmlXPathFormatNumber: + * @number: number to format + * @buffer: output buffer + * @buffersize: size of output buffer + * + * Convert the number into a string representation. + */ +static void +xmlXPathFormatNumber(double number, char buffer[], int buffersize) +{ + switch (xmlXPathIsInf(number)) { + case 1: + if (buffersize > (int)sizeof("Infinity")) + snprintf(buffer, buffersize, "Infinity"); + break; + case -1: + if (buffersize > (int)sizeof("-Infinity")) + snprintf(buffer, buffersize, "-Infinity"); + break; + default: + if (xmlXPathIsNaN(number)) { + if (buffersize > (int)sizeof("NaN")) + snprintf(buffer, buffersize, "NaN"); + } else if (number == 0 && xmlXPathGetSign(number) != 0) { + snprintf(buffer, buffersize, "0"); + } else if (number == ((int) number)) { + char work[30]; + char *ptr, *cur; + int res, value = (int) number; + + ptr = &buffer[0]; + if (value < 0) { + *ptr++ = '-'; + value = -value; + } + if (value == 0) { + *ptr++ = '0'; + } else { + cur = &work[0]; + while (value != 0) { + res = value % 10; + value = value / 10; + *cur++ = '0' + res; + } + cur--; + while ((cur >= &work[0]) && (ptr - buffer < buffersize)) { + *ptr++ = *cur--; + } + } + if (ptr - buffer < buffersize) { + *ptr = 0; + } else if (buffersize > 0) { + ptr--; + *ptr = 0; + } + } else { + /* 3 is sign, decimal point, and terminating zero */ + char work[DBL_DIG + EXPONENT_DIGITS + 3]; + int integer_place, fraction_place; + char *ptr; + char *after_fraction; + double absolute_value; + int size; + + absolute_value = fabs(number); + + /* + * First choose format - scientific or regular floating point. + * In either case, result is in work, and after_fraction points + * just past the fractional part. + */ + if ( ((absolute_value > UPPER_DOUBLE) || + (absolute_value < LOWER_DOUBLE)) && + (absolute_value != 0.0) ) { + /* Use scientific notation */ + integer_place = DBL_DIG + EXPONENT_DIGITS + 1; + fraction_place = DBL_DIG - 1; + snprintf(work, sizeof(work),"%*.*e", + integer_place, fraction_place, number); + after_fraction = strchr(work + DBL_DIG, 'e'); + } + else { + /* Use regular notation */ + if (absolute_value > 0.0) + integer_place = 1 + (int)log10(absolute_value); + else + integer_place = 0; + fraction_place = (integer_place > 0) + ? DBL_DIG - integer_place + : DBL_DIG; + size = snprintf(work, sizeof(work), "%0.*f", + fraction_place, number); + after_fraction = work + size; + } + + /* Remove fractional trailing zeroes */ + ptr = after_fraction; + while (*(--ptr) == '0') + ; + if (*ptr != '.') + ptr++; + strcpy(ptr, after_fraction); + + /* Finally copy result back to caller */ + size = strlen(work) + 1; + if (size > buffersize) { + work[buffersize - 1] = 0; + size = buffersize; + } + memcpy(buffer, work, size); + } + break; + } +} + +/************************************************************************ + * * + * Error handling routines * + * * + ************************************************************************/ + + +static const char *xmlXPathErrorMessages[] = { + "Ok", + "Number encoding", + "Unfinished literal", + "Start of literal", + "Expected $ for variable reference", + "Undefined variable", + "Invalid predicate", + "Invalid expression", + "Missing closing curly brace", + "Unregistered function", + "Invalid operand", + "Invalid type", + "Invalid number of arguments", + "Invalid context size", + "Invalid context position", + "Memory allocation error", + "Syntax error", + "Resource error", + "Sub resource error", + "Undefined namespace prefix", + "Encoding error", + "Char out of XML range" +}; + +/** + * xmlXPatherror: + * @ctxt: the XPath Parser context + * @file: the file name + * @line: the line number + * @no: the error number + * + * Formats an error message. + */ +void +xmlXPatherror(xmlXPathParserContextPtr ctxt, const char *file ATTRIBUTE_UNUSED, + int line ATTRIBUTE_UNUSED, int no) { + int n; + const xmlChar *cur; + const xmlChar *base; + + cur = ctxt->cur; + base = ctxt->base; + if ((cur == NULL) || (base == NULL)) { + if ((ctxt->comp != NULL) && (ctxt->comp->expr != NULL)) { + xmlGenericError(xmlGenericErrorContext, + "XPath error %s in %s\n", xmlXPathErrorMessages[no], + ctxt->comp->expr); + } else { + xmlGenericError(xmlGenericErrorContext, + "XPath error %s\n", xmlXPathErrorMessages[no]); + } + + return; + } + xmlGenericError(xmlGenericErrorContext, + "XPath error %s\n", xmlXPathErrorMessages[no]); + + while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) { + cur--; + } + n = 0; + while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r')) + cur--; + if ((*cur == '\n') || (*cur == '\r')) cur++; + base = cur; + n = 0; + while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) { + xmlGenericError(xmlGenericErrorContext, "%c", (unsigned char) *cur++); + n++; + } + xmlGenericError(xmlGenericErrorContext, "\n"); + cur = ctxt->cur; + while ((*cur == '\n') || (*cur == '\r')) + cur--; + n = 0; + while ((cur != base) && (n++ < 80)) { + xmlGenericError(xmlGenericErrorContext, " "); + base++; + } + xmlGenericError(xmlGenericErrorContext,"^\n"); +} + + +/************************************************************************ + * * + * Routines to handle NodeSets * + * * + ************************************************************************/ + +/** + * xmlXPathCmpNodes: + * @node1: the first node + * @node2: the second node + * + * Compare two nodes w.r.t document order + * + * Returns -2 in case of error 1 if first point < second point, 0 if + * that's the same node, -1 otherwise + */ +int +xmlXPathCmpNodes(xmlNodePtr node1, xmlNodePtr node2) { + int depth1, depth2; + xmlNodePtr cur, root; + + if ((node1 == NULL) || (node2 == NULL)) + return(-2); + /* + * a couple of optimizations which will avoid computations in most cases + */ + if (node1 == node2) + return(0); + if ((node1->type == XML_NAMESPACE_DECL) || + (node2->type == XML_NAMESPACE_DECL)) + return(1); + if (node1 == node2->prev) + return(1); + if (node1 == node2->next) + return(-1); + +#if 0 + Unfortunately this does not work. Line number in entities reset + to 1 within the entity :-( + + /* + * Speedup using line numbers if availble. + */ + if ((node1->type == XML_ELEMENT_NODE) && + (node2->type == XML_ELEMENT_NODE) && + (0 != (int) node1->content) && (0 != (int) node2->content)) { + int l1, l2; + l1 = (int) node1->content; + l2 = (int) node2->content; + if (l1 < l2) + return(1); + if (l1 > l2) + return(-1); + } +#endif + /* + * compute depth to root + */ + for (depth2 = 0, cur = node2;cur->parent != NULL;cur = cur->parent) { + if (cur == node1) + return(1); + depth2++; + } + root = cur; + for (depth1 = 0, cur = node1;cur->parent != NULL;cur = cur->parent) { + if (cur == node2) + return(-1); + depth1++; + } + /* + * Distinct document (or distinct entities :-( ) case. + */ + if (root != cur) { + return(-2); + } + /* + * get the nearest common ancestor. + */ + while (depth1 > depth2) { + depth1--; + node1 = node1->parent; + } + while (depth2 > depth1) { + depth2--; + node2 = node2->parent; + } + while (node1->parent != node2->parent) { + node1 = node1->parent; + node2 = node2->parent; + /* should not happen but just in case ... */ + if ((node1 == NULL) || (node2 == NULL)) + return(-2); + } + /* + * Find who's first. + */ + if (node1 == node2->next) + return(-1); + for (cur = node1->next;cur != NULL;cur = cur->next) + if (cur == node2) + return(1); + return(-1); /* assume there is no sibling list corruption */ +} + +/** + * xmlXPathNodeSetSort: + * @set: the node set + * + * Sort the node set in document order + */ +void +xmlXPathNodeSetSort(xmlNodeSetPtr set) { + int i, j, incr, len; + xmlNodePtr tmp; + + if (set == NULL) + return; + + /* Use Shell's sort to sort the node-set */ + len = set->nodeNr; + for (incr = len / 2; incr > 0; incr /= 2) { + for (i = incr; i < len; i++) { + j = i - incr; + while (j >= 0) { + if (xmlXPathCmpNodes(set->nodeTab[j], + set->nodeTab[j + incr]) == -1) { + tmp = set->nodeTab[j]; + set->nodeTab[j] = set->nodeTab[j + incr]; + set->nodeTab[j + incr] = tmp; + j -= incr; + } else + break; + } + } + } +} + +#define XML_NODESET_DEFAULT 10 +/** + * xmlXPathNodeSetDupNs: + * @node: the parent node of the namespace XPath node + * @ns: the libxml namespace declaration node. + * + * Namespace node in libxml don't match the XPath semantic. In a node set + * the namespace nodes are duplicated and the next pointer is set to the + * parent node in the XPath semantic. + * + * Returns the newly created object. + */ +static xmlNodePtr +xmlXPathNodeSetDupNs(xmlNodePtr node, xmlNsPtr ns) { + xmlNsPtr cur; + + if ((ns == NULL) || (ns->type != XML_NAMESPACE_DECL)) + return(NULL); + if ((node == NULL) || (node->type == XML_NAMESPACE_DECL)) + return((xmlNodePtr) ns); + + /* + * Allocate a new Namespace and fill the fields. + */ + cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetDupNs : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlNs)); + cur->type = XML_NAMESPACE_DECL; + if (ns->href != NULL) + cur->href = xmlStrdup(ns->href); + if (ns->prefix != NULL) + cur->prefix = xmlStrdup(ns->prefix); + cur->next = (xmlNsPtr) node; + return((xmlNodePtr) cur); +} + +/** + * xmlXPathNodeSetFreeNs: + * @ns: the XPath namespace node found in a nodeset. + * + * Namespace node in libxml don't match the XPath semantic. In a node set + * the namespace nodes are duplicated and the next pointer is set to the + * parent node in the XPath semantic. Check if such a node need to be freed + */ +void +xmlXPathNodeSetFreeNs(xmlNsPtr ns) { + if ((ns == NULL) || (ns->type != XML_NAMESPACE_DECL)) + return; + + if ((ns->next != NULL) && (ns->next->type != XML_NAMESPACE_DECL)) { + if (ns->href != NULL) + xmlFree((xmlChar *)ns->href); + if (ns->prefix != NULL) + xmlFree((xmlChar *)ns->prefix); + xmlFree(ns); + } +} + +/** + * xmlXPathNodeSetCreate: + * @val: an initial xmlNodePtr, or NULL + * + * Create a new xmlNodeSetPtr of type double and of value @val + * + * Returns the newly created object. + */ +xmlNodeSetPtr +xmlXPathNodeSetCreate(xmlNodePtr val) { + xmlNodeSetPtr ret; + + ret = (xmlNodeSetPtr) xmlMalloc(sizeof(xmlNodeSet)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetCreate: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlNodeSet)); + if (val != NULL) { + ret->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (ret->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetCreate: out of memory\n"); + return(NULL); + } + memset(ret->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + ret->nodeMax = XML_NODESET_DEFAULT; + if (val->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns = (xmlNsPtr) val; + + ret->nodeTab[ret->nodeNr++] = + xmlXPathNodeSetDupNs((xmlNodePtr) ns->next, ns); + } else + ret->nodeTab[ret->nodeNr++] = val; + } + return(ret); +} + +/** + * xmlXPathNodeSetContains: + * @cur: the node-set + * @val: the node + * + * checks whether @cur contains @val + * + * Returns true (1) if @cur contains @val, false (0) otherwise + */ +int +xmlXPathNodeSetContains (xmlNodeSetPtr cur, xmlNodePtr val) { + int i; + + if (val->type == XML_NAMESPACE_DECL) { + for (i = 0; i < cur->nodeNr; i++) { + if (cur->nodeTab[i]->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns1, ns2; + + ns1 = (xmlNsPtr) val; + ns2 = (xmlNsPtr) cur->nodeTab[i]; + if (ns1 == ns2) + return(1); + if ((ns1->next != NULL) && (ns2->next == ns1->next) && + (xmlStrEqual(ns1->prefix, ns2->prefix))) + return(1); + } + } + } else { + for (i = 0; i < cur->nodeNr; i++) { + if (cur->nodeTab[i] == val) + return(1); + } + } + return(0); +} + +/** + * xmlXPathNodeSetAddNs: + * @cur: the initial node set + * @node: the hosting node + * @ns: a the namespace node + * + * add a new namespace node to an existing NodeSet + */ +void +xmlXPathNodeSetAddNs(xmlNodeSetPtr cur, xmlNodePtr node, xmlNsPtr ns) { + int i; + + if ((ns == NULL) || (node == NULL) || (ns->type != XML_NAMESPACE_DECL) || + (node->type != XML_ELEMENT_NODE)) + return; + + /* @@ with_ns to check wether namespace nodes should be looked at @@ */ + /* + * check against doublons + */ + for (i = 0;i < cur->nodeNr;i++) { + if ((cur->nodeTab[i] != NULL) && + (cur->nodeTab[i]->type == XML_NAMESPACE_DECL) && + (((xmlNsPtr)cur->nodeTab[i])->next == (xmlNsPtr) node) && + (xmlStrEqual(ns->prefix, ((xmlNsPtr)cur->nodeTab[i])->prefix))) + return; + } + + /* + * grow the nodeTab if needed + */ + if (cur->nodeMax == 0) { + cur->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (cur->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAdd: out of memory\n"); + return; + } + memset(cur->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + cur->nodeMax = XML_NODESET_DEFAULT; + } else if (cur->nodeNr == cur->nodeMax) { + xmlNodePtr *temp; + + cur->nodeMax *= 2; + temp = (xmlNodePtr *) xmlRealloc(cur->nodeTab, cur->nodeMax * + sizeof(xmlNodePtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAdd: out of memory\n"); + return; + } + cur->nodeTab = temp; + } + cur->nodeTab[cur->nodeNr++] = xmlXPathNodeSetDupNs(node, ns); +} + +/** + * xmlXPathNodeSetAdd: + * @cur: the initial node set + * @val: a new xmlNodePtr + * + * add a new xmlNodePtr to an existing NodeSet + */ +void +xmlXPathNodeSetAdd(xmlNodeSetPtr cur, xmlNodePtr val) { + int i; + + if (val == NULL) return; + + /* @@ with_ns to check wether namespace nodes should be looked at @@ */ + /* + * check against doublons + */ + for (i = 0;i < cur->nodeNr;i++) + if (cur->nodeTab[i] == val) return; + + /* + * grow the nodeTab if needed + */ + if (cur->nodeMax == 0) { + cur->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (cur->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAdd: out of memory\n"); + return; + } + memset(cur->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + cur->nodeMax = XML_NODESET_DEFAULT; + } else if (cur->nodeNr == cur->nodeMax) { + xmlNodePtr *temp; + + cur->nodeMax *= 2; + temp = (xmlNodePtr *) xmlRealloc(cur->nodeTab, cur->nodeMax * + sizeof(xmlNodePtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAdd: out of memory\n"); + return; + } + cur->nodeTab = temp; + } + if (val->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns = (xmlNsPtr) val; + + cur->nodeTab[cur->nodeNr++] = + xmlXPathNodeSetDupNs((xmlNodePtr) ns->next, ns); + } else + cur->nodeTab[cur->nodeNr++] = val; +} + +/** + * xmlXPathNodeSetAddUnique: + * @cur: the initial node set + * @val: a new xmlNodePtr + * + * add a new xmlNodePtr to an existing NodeSet, optimized version + * when we are sure the node is not already in the set. + */ +void +xmlXPathNodeSetAddUnique(xmlNodeSetPtr cur, xmlNodePtr val) { + if (val == NULL) return; + + /* @@ with_ns to check wether namespace nodes should be looked at @@ */ + /* + * grow the nodeTab if needed + */ + if (cur->nodeMax == 0) { + cur->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (cur->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAddUnique: out of memory\n"); + return; + } + memset(cur->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + cur->nodeMax = XML_NODESET_DEFAULT; + } else if (cur->nodeNr == cur->nodeMax) { + xmlNodePtr *temp; + + cur->nodeMax *= 2; + temp = (xmlNodePtr *) xmlRealloc(cur->nodeTab, cur->nodeMax * + sizeof(xmlNodePtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAddUnique: out of memory\n"); + return; + } + cur->nodeTab = temp; + } + if (val->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns = (xmlNsPtr) val; + + cur->nodeTab[cur->nodeNr++] = + xmlXPathNodeSetDupNs((xmlNodePtr) ns->next, ns); + } else + cur->nodeTab[cur->nodeNr++] = val; +} + +/** + * xmlXPathNodeSetMerge: + * @val1: the first NodeSet or NULL + * @val2: the second NodeSet + * + * Merges two nodesets, all nodes from @val2 are added to @val1 + * if @val1 is NULL, a new set is created and copied from @val2 + * + * Returns @val1 once extended or NULL in case of error. + */ +xmlNodeSetPtr +xmlXPathNodeSetMerge(xmlNodeSetPtr val1, xmlNodeSetPtr val2) { + int i, j, initNr, skip; + + if (val2 == NULL) return(val1); + if (val1 == NULL) { + val1 = xmlXPathNodeSetCreate(NULL); + } + + /* @@ with_ns to check wether namespace nodes should be looked at @@ */ + initNr = val1->nodeNr; + + for (i = 0;i < val2->nodeNr;i++) { + /* + * check against doublons + */ + skip = 0; + for (j = 0; j < initNr; j++) { + if (val1->nodeTab[j] == val2->nodeTab[i]) { + skip = 1; + break; + } else if ((val1->nodeTab[j]->type == XML_NAMESPACE_DECL) && + (val2->nodeTab[i]->type == XML_NAMESPACE_DECL)) { + xmlNsPtr ns1, ns2; + ns1 = (xmlNsPtr) val1->nodeTab[j]; + ns2 = (xmlNsPtr) val2->nodeTab[i]; + if ((ns1->next == ns2->next) && + (xmlStrEqual(ns1->prefix, ns2->prefix))) { + skip = 1; + break; + } + } + } + if (skip) + continue; + + /* + * grow the nodeTab if needed + */ + if (val1->nodeMax == 0) { + val1->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (val1->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetMerge: out of memory\n"); + return(NULL); + } + memset(val1->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + val1->nodeMax = XML_NODESET_DEFAULT; + } else if (val1->nodeNr == val1->nodeMax) { + xmlNodePtr *temp; + + val1->nodeMax *= 2; + temp = (xmlNodePtr *) xmlRealloc(val1->nodeTab, val1->nodeMax * + sizeof(xmlNodePtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetMerge: out of memory\n"); + return(NULL); + } + val1->nodeTab = temp; + } + if (val2->nodeTab[i]->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns = (xmlNsPtr) val2->nodeTab[i]; + + val1->nodeTab[val1->nodeNr++] = + xmlXPathNodeSetDupNs((xmlNodePtr) ns->next, ns); + } else + val1->nodeTab[val1->nodeNr++] = val2->nodeTab[i]; + } + + return(val1); +} + +/** + * xmlXPathNodeSetMergeUnique: + * @val1: the first NodeSet or NULL + * @val2: the second NodeSet + * + * Merges two nodesets, all nodes from @val2 are added to @val1 + * if @val1 is NULL, a new set is created and copied from @val2 + * + * Returns @val1 once extended or NULL in case of error. + */ +static xmlNodeSetPtr +xmlXPathNodeSetMergeUnique(xmlNodeSetPtr val1, xmlNodeSetPtr val2) { + int i, initNr; + + if (val2 == NULL) return(val1); + if (val1 == NULL) { + val1 = xmlXPathNodeSetCreate(NULL); + } + + /* @@ with_ns to check wether namespace nodes should be looked at @@ */ + initNr = val1->nodeNr; + + for (i = 0;i < val2->nodeNr;i++) { + /* + * grow the nodeTab if needed + */ + if (val1->nodeMax == 0) { + val1->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (val1->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetMerge: out of memory\n"); + return(NULL); + } + memset(val1->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + val1->nodeMax = XML_NODESET_DEFAULT; + } else if (val1->nodeNr == val1->nodeMax) { + xmlNodePtr *temp; + + val1->nodeMax *= 2; + temp = (xmlNodePtr *) xmlRealloc(val1->nodeTab, val1->nodeMax * + sizeof(xmlNodePtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetMerge: out of memory\n"); + return(NULL); + } + val1->nodeTab = temp; + } + if (val2->nodeTab[i]->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns = (xmlNsPtr) val2->nodeTab[i]; + + val1->nodeTab[val1->nodeNr++] = + xmlXPathNodeSetDupNs((xmlNodePtr) ns->next, ns); + } else + val1->nodeTab[val1->nodeNr++] = val2->nodeTab[i]; + } + + return(val1); +} + +/** + * xmlXPathNodeSetDel: + * @cur: the initial node set + * @val: an xmlNodePtr + * + * Removes an xmlNodePtr from an existing NodeSet + */ +void +xmlXPathNodeSetDel(xmlNodeSetPtr cur, xmlNodePtr val) { + int i; + + if (cur == NULL) return; + if (val == NULL) return; + + /* + * check against doublons + */ + for (i = 0;i < cur->nodeNr;i++) + if (cur->nodeTab[i] == val) break; + + if (i >= cur->nodeNr) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetDel: Node %s wasn't found in NodeList\n", + val->name); +#endif + return; + } + if ((cur->nodeTab[i] != NULL) && + (cur->nodeTab[i]->type == XML_NAMESPACE_DECL)) + xmlXPathNodeSetFreeNs((xmlNsPtr) cur->nodeTab[i]); + cur->nodeNr--; + for (;i < cur->nodeNr;i++) + cur->nodeTab[i] = cur->nodeTab[i + 1]; + cur->nodeTab[cur->nodeNr] = NULL; +} + +/** + * xmlXPathNodeSetRemove: + * @cur: the initial node set + * @val: the index to remove + * + * Removes an entry from an existing NodeSet list. + */ +void +xmlXPathNodeSetRemove(xmlNodeSetPtr cur, int val) { + if (cur == NULL) return; + if (val >= cur->nodeNr) return; + if ((cur->nodeTab[val] != NULL) && + (cur->nodeTab[val]->type == XML_NAMESPACE_DECL)) + xmlXPathNodeSetFreeNs((xmlNsPtr) cur->nodeTab[val]); + cur->nodeNr--; + for (;val < cur->nodeNr;val++) + cur->nodeTab[val] = cur->nodeTab[val + 1]; + cur->nodeTab[cur->nodeNr] = NULL; +} + +/** + * xmlXPathFreeNodeSet: + * @obj: the xmlNodeSetPtr to free + * + * Free the NodeSet compound (not the actual nodes !). + */ +void +xmlXPathFreeNodeSet(xmlNodeSetPtr obj) { + if (obj == NULL) return; + if (obj->nodeTab != NULL) { + int i; + + /* @@ with_ns to check wether namespace nodes should be looked at @@ */ + for (i = 0;i < obj->nodeNr;i++) + if ((obj->nodeTab[i] != NULL) && + (obj->nodeTab[i]->type == XML_NAMESPACE_DECL)) + xmlXPathNodeSetFreeNs((xmlNsPtr) obj->nodeTab[i]); + xmlFree(obj->nodeTab); + } + xmlFree(obj); +} + +/** + * xmlXPathFreeValueTree: + * @obj: the xmlNodeSetPtr to free + * + * Free the NodeSet compound and the actual tree, this is different + * from xmlXPathFreeNodeSet() + */ +static void +xmlXPathFreeValueTree(xmlNodeSetPtr obj) { + int i; + + if (obj == NULL) return; + + if (obj->nodeTab != NULL) { + for (i = 0;i < obj->nodeNr;i++) { + if (obj->nodeTab[i] != NULL) { + if (obj->nodeTab[i]->type == XML_NAMESPACE_DECL) { + xmlXPathNodeSetFreeNs((xmlNsPtr) obj->nodeTab[i]); + } else { + xmlFreeNodeList(obj->nodeTab[i]); + } + } + } + xmlFree(obj->nodeTab); + } + xmlFree(obj); +} + +#if defined(DEBUG) || defined(DEBUG_STEP) +/** + * xmlGenericErrorContextNodeSet: + * @output: a FILE * for the output + * @obj: the xmlNodeSetPtr to free + * + * Quick display of a NodeSet + */ +void +xmlGenericErrorContextNodeSet(FILE *output, xmlNodeSetPtr obj) { + int i; + + if (output == NULL) output = xmlGenericErrorContext; + if (obj == NULL) { + fprintf(output, "NodeSet == NULL !\n"); + return; + } + if (obj->nodeNr == 0) { + fprintf(output, "NodeSet is empty\n"); + return; + } + if (obj->nodeTab == NULL) { + fprintf(output, " nodeTab == NULL !\n"); + return; + } + for (i = 0; i < obj->nodeNr; i++) { + if (obj->nodeTab[i] == NULL) { + fprintf(output, " NULL !\n"); + return; + } + if ((obj->nodeTab[i]->type == XML_DOCUMENT_NODE) || + (obj->nodeTab[i]->type == XML_HTML_DOCUMENT_NODE)) + fprintf(output, " /"); + else if (obj->nodeTab[i]->name == NULL) + fprintf(output, " noname!"); + else fprintf(output, " %s", obj->nodeTab[i]->name); + } + fprintf(output, "\n"); +} +#endif + +/** + * xmlXPathNewNodeSet: + * @val: the NodePtr value + * + * Create a new xmlXPathObjectPtr of type NodeSet and initialize + * it with the single Node @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewNodeSet(xmlNodePtr val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewNodeSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_NODESET; + ret->boolval = 0; + ret->nodesetval = xmlXPathNodeSetCreate(val); + /* @@ with_ns to check wether namespace nodes should be looked at @@ */ + return(ret); +} + +/** + * xmlXPathNewValueTree: + * @val: the NodePtr value + * + * Create a new xmlXPathObjectPtr of type Value Tree (XSLT) and initialize + * it with the tree root @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewValueTree(xmlNodePtr val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewNodeSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_XSLT_TREE; + ret->boolval = 1; + ret->user = (void *) val; + ret->nodesetval = xmlXPathNodeSetCreate(val); + return(ret); +} + +/** + * xmlXPathNewNodeSetList: + * @val: an existing NodeSet + * + * Create a new xmlXPathObjectPtr of type NodeSet and initialize + * it with the Nodeset @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewNodeSetList(xmlNodeSetPtr val) +{ + xmlXPathObjectPtr ret; + int i; + + if (val == NULL) + ret = NULL; + else if (val->nodeTab == NULL) + ret = xmlXPathNewNodeSet(NULL); + else { + ret = xmlXPathNewNodeSet(val->nodeTab[0]); + for (i = 1; i < val->nodeNr; ++i) + xmlXPathNodeSetAddUnique(ret->nodesetval, val->nodeTab[i]); + } + + return (ret); +} + +/** + * xmlXPathWrapNodeSet: + * @val: the NodePtr value + * + * Wrap the Nodeset @val in a new xmlXPathObjectPtr + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathWrapNodeSet(xmlNodeSetPtr val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathWrapNodeSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_NODESET; + ret->nodesetval = val; + return(ret); +} + +/** + * xmlXPathFreeNodeSetList: + * @obj: an existing NodeSetList object + * + * Free up the xmlXPathObjectPtr @obj but don't deallocate the objects in + * the list contrary to xmlXPathFreeObject(). + */ +void +xmlXPathFreeNodeSetList(xmlXPathObjectPtr obj) { + if (obj == NULL) return; + xmlFree(obj); +} + +/** + * xmlXPathDifference: + * @nodes1: a node-set + * @nodes2: a node-set + * + * Implements the EXSLT - Sets difference() function: + * node-set set:difference (node-set, node-set) + * + * Returns the difference between the two node sets, or nodes1 if + * nodes2 is empty + */ +xmlNodeSetPtr +xmlXPathDifference (xmlNodeSetPtr nodes1, xmlNodeSetPtr nodes2) { + xmlNodeSetPtr ret; + int i, l1; + xmlNodePtr cur; + + if (xmlXPathNodeSetIsEmpty(nodes2)) + return(nodes1); + + ret = xmlXPathNodeSetCreate(NULL); + if (xmlXPathNodeSetIsEmpty(nodes1)) + return(ret); + + l1 = xmlXPathNodeSetGetLength(nodes1); + + for (i = 0; i < l1; i++) { + cur = xmlXPathNodeSetItem(nodes1, i); + if (!xmlXPathNodeSetContains(nodes2, cur)) + xmlXPathNodeSetAddUnique(ret, cur); + } + return(ret); +} + +/** + * xmlXPathIntersection: + * @nodes1: a node-set + * @nodes2: a node-set + * + * Implements the EXSLT - Sets intersection() function: + * node-set set:intersection (node-set, node-set) + * + * Returns a node set comprising the nodes that are within both the + * node sets passed as arguments + */ +xmlNodeSetPtr +xmlXPathIntersection (xmlNodeSetPtr nodes1, xmlNodeSetPtr nodes2) { + xmlNodeSetPtr ret = xmlXPathNodeSetCreate(NULL); + int i, l1; + xmlNodePtr cur; + + if (xmlXPathNodeSetIsEmpty(nodes1)) + return(ret); + if (xmlXPathNodeSetIsEmpty(nodes2)) + return(ret); + + l1 = xmlXPathNodeSetGetLength(nodes1); + + for (i = 0; i < l1; i++) { + cur = xmlXPathNodeSetItem(nodes1, i); + if (xmlXPathNodeSetContains(nodes2, cur)) + xmlXPathNodeSetAddUnique(ret, cur); + } + return(ret); +} + +/** + * xmlXPathDistinctSorted: + * @nodes: a node-set, sorted by document order + * + * Implements the EXSLT - Sets distinct() function: + * node-set set:distinct (node-set) + * + * Returns a subset of the nodes contained in @nodes, or @nodes if + * it is empty + */ +xmlNodeSetPtr +xmlXPathDistinctSorted (xmlNodeSetPtr nodes) { + xmlNodeSetPtr ret; + xmlHashTablePtr hash; + int i, l; + xmlChar * strval; + xmlNodePtr cur; + + if (xmlXPathNodeSetIsEmpty(nodes)) + return(nodes); + + ret = xmlXPathNodeSetCreate(NULL); + l = xmlXPathNodeSetGetLength(nodes); + hash = xmlHashCreate (l); + for (i = 0; i < l; i++) { + cur = xmlXPathNodeSetItem(nodes, i); + strval = xmlXPathCastNodeToString(cur); + if (xmlHashLookup(hash, strval) == NULL) { + xmlHashAddEntry(hash, strval, strval); + xmlXPathNodeSetAddUnique(ret, cur); + } else { + xmlFree(strval); + } + } + xmlHashFree(hash, (xmlHashDeallocator) xmlFree); + return(ret); +} + +/** + * xmlXPathDistinct: + * @nodes: a node-set + * + * Implements the EXSLT - Sets distinct() function: + * node-set set:distinct (node-set) + * @nodes is sorted by document order, then #exslSetsDistinctSorted + * is called with the sorted node-set + * + * Returns a subset of the nodes contained in @nodes, or @nodes if + * it is empty + */ +xmlNodeSetPtr +xmlXPathDistinct (xmlNodeSetPtr nodes) { + if (xmlXPathNodeSetIsEmpty(nodes)) + return(nodes); + + xmlXPathNodeSetSort(nodes); + return(xmlXPathDistinctSorted(nodes)); +} + +/** + * xmlXPathHasSameNodes: + * @nodes1: a node-set + * @nodes2: a node-set + * + * Implements the EXSLT - Sets has-same-nodes function: + * boolean set:has-same-node(node-set, node-set) + * + * Returns true (1) if @nodes1 shares any node with @nodes2, false (0) + * otherwise + */ +int +xmlXPathHasSameNodes (xmlNodeSetPtr nodes1, xmlNodeSetPtr nodes2) { + int i, l; + xmlNodePtr cur; + + if (xmlXPathNodeSetIsEmpty(nodes1) || + xmlXPathNodeSetIsEmpty(nodes2)) + return(0); + + l = xmlXPathNodeSetGetLength(nodes1); + for (i = 0; i < l; i++) { + cur = xmlXPathNodeSetItem(nodes1, i); + if (xmlXPathNodeSetContains(nodes2, cur)) + return(1); + } + return(0); +} + +/** + * xmlXPathNodeLeadingSorted: + * @nodes: a node-set, sorted by document order + * @node: a node + * + * Implements the EXSLT - Sets leading() function: + * node-set set:leading (node-set, node-set) + * + * Returns the nodes in @nodes that precede @node in document order, + * @nodes if @node is NULL or an empty node-set if @nodes + * doesn't contain @node + */ +xmlNodeSetPtr +xmlXPathNodeLeadingSorted (xmlNodeSetPtr nodes, xmlNodePtr node) { + int i, l; + xmlNodePtr cur; + xmlNodeSetPtr ret; + + if (node == NULL) + return(nodes); + + ret = xmlXPathNodeSetCreate(NULL); + if (xmlXPathNodeSetIsEmpty(nodes) || + (!xmlXPathNodeSetContains(nodes, node))) + return(ret); + + l = xmlXPathNodeSetGetLength(nodes); + for (i = 0; i < l; i++) { + cur = xmlXPathNodeSetItem(nodes, i); + if (cur == node) + break; + xmlXPathNodeSetAddUnique(ret, cur); + } + return(ret); +} + +/** + * xmlXPathNodeLeading: + * @nodes: a node-set + * @node: a node + * + * Implements the EXSLT - Sets leading() function: + * node-set set:leading (node-set, node-set) + * @nodes is sorted by document order, then #exslSetsNodeLeadingSorted + * is called. + * + * Returns the nodes in @nodes that precede @node in document order, + * @nodes if @node is NULL or an empty node-set if @nodes + * doesn't contain @node + */ +xmlNodeSetPtr +xmlXPathNodeLeading (xmlNodeSetPtr nodes, xmlNodePtr node) { + xmlXPathNodeSetSort(nodes); + return(xmlXPathNodeLeadingSorted(nodes, node)); +} + +/** + * xmlXPathLeadingSorted: + * @nodes1: a node-set, sorted by document order + * @nodes2: a node-set, sorted by document order + * + * Implements the EXSLT - Sets leading() function: + * node-set set:leading (node-set, node-set) + * + * Returns the nodes in @nodes1 that precede the first node in @nodes2 + * in document order, @nodes1 if @nodes2 is NULL or empty or + * an empty node-set if @nodes1 doesn't contain @nodes2 + */ +xmlNodeSetPtr +xmlXPathLeadingSorted (xmlNodeSetPtr nodes1, xmlNodeSetPtr nodes2) { + if (xmlXPathNodeSetIsEmpty(nodes2)) + return(nodes1); + return(xmlXPathNodeLeadingSorted(nodes1, + xmlXPathNodeSetItem(nodes2, 1))); +} + +/** + * xmlXPathLeading: + * @nodes1: a node-set + * @nodes2: a node-set + * + * Implements the EXSLT - Sets leading() function: + * node-set set:leading (node-set, node-set) + * @nodes1 and @nodes2 are sorted by document order, then + * #exslSetsLeadingSorted is called. + * + * Returns the nodes in @nodes1 that precede the first node in @nodes2 + * in document order, @nodes1 if @nodes2 is NULL or empty or + * an empty node-set if @nodes1 doesn't contain @nodes2 + */ +xmlNodeSetPtr +xmlXPathLeading (xmlNodeSetPtr nodes1, xmlNodeSetPtr nodes2) { + if (xmlXPathNodeSetIsEmpty(nodes2)) + return(nodes1); + if (xmlXPathNodeSetIsEmpty(nodes1)) + return(xmlXPathNodeSetCreate(NULL)); + xmlXPathNodeSetSort(nodes1); + xmlXPathNodeSetSort(nodes2); + return(xmlXPathNodeLeadingSorted(nodes1, + xmlXPathNodeSetItem(nodes2, 1))); +} + +/** + * xmlXPathNodeTrailingSorted: + * @nodes: a node-set, sorted by document order + * @node: a node + * + * Implements the EXSLT - Sets trailing() function: + * node-set set:trailing (node-set, node-set) + * + * Returns the nodes in @nodes that follow @node in document order, + * @nodes if @node is NULL or an empty node-set if @nodes + * doesn't contain @node + */ +xmlNodeSetPtr +xmlXPathNodeTrailingSorted (xmlNodeSetPtr nodes, xmlNodePtr node) { + int i, l; + xmlNodePtr cur; + xmlNodeSetPtr ret; + + if (node == NULL) + return(nodes); + + ret = xmlXPathNodeSetCreate(NULL); + if (xmlXPathNodeSetIsEmpty(nodes) || + (!xmlXPathNodeSetContains(nodes, node))) + return(ret); + + l = xmlXPathNodeSetGetLength(nodes); + for (i = l; i > 0; i--) { + cur = xmlXPathNodeSetItem(nodes, i); + if (cur == node) + break; + xmlXPathNodeSetAddUnique(ret, cur); + } + return(ret); +} + +/** + * xmlXPathNodeTrailing: + * @nodes: a node-set + * @node: a node + * + * Implements the EXSLT - Sets trailing() function: + * node-set set:trailing (node-set, node-set) + * @nodes is sorted by document order, then #xmlXPathNodeTrailingSorted + * is called. + * + * Returns the nodes in @nodes that follow @node in document order, + * @nodes if @node is NULL or an empty node-set if @nodes + * doesn't contain @node + */ +xmlNodeSetPtr +xmlXPathNodeTrailing (xmlNodeSetPtr nodes, xmlNodePtr node) { + xmlXPathNodeSetSort(nodes); + return(xmlXPathNodeTrailingSorted(nodes, node)); +} + +/** + * xmlXPathTrailingSorted: + * @nodes1: a node-set, sorted by document order + * @nodes2: a node-set, sorted by document order + * + * Implements the EXSLT - Sets trailing() function: + * node-set set:trailing (node-set, node-set) + * + * Returns the nodes in @nodes1 that follow the first node in @nodes2 + * in document order, @nodes1 if @nodes2 is NULL or empty or + * an empty node-set if @nodes1 doesn't contain @nodes2 + */ +xmlNodeSetPtr +xmlXPathTrailingSorted (xmlNodeSetPtr nodes1, xmlNodeSetPtr nodes2) { + if (xmlXPathNodeSetIsEmpty(nodes2)) + return(nodes1); + return(xmlXPathNodeTrailingSorted(nodes1, + xmlXPathNodeSetItem(nodes2, 0))); +} + +/** + * xmlXPathTrailing: + * @nodes1: a node-set + * @nodes2: a node-set + * + * Implements the EXSLT - Sets trailing() function: + * node-set set:trailing (node-set, node-set) + * @nodes1 and @nodes2 are sorted by document order, then + * #xmlXPathTrailingSorted is called. + * + * Returns the nodes in @nodes1 that follow the first node in @nodes2 + * in document order, @nodes1 if @nodes2 is NULL or empty or + * an empty node-set if @nodes1 doesn't contain @nodes2 + */ +xmlNodeSetPtr +xmlXPathTrailing (xmlNodeSetPtr nodes1, xmlNodeSetPtr nodes2) { + if (xmlXPathNodeSetIsEmpty(nodes2)) + return(nodes1); + if (xmlXPathNodeSetIsEmpty(nodes1)) + return(xmlXPathNodeSetCreate(NULL)); + xmlXPathNodeSetSort(nodes1); + xmlXPathNodeSetSort(nodes2); + return(xmlXPathNodeTrailingSorted(nodes1, + xmlXPathNodeSetItem(nodes2, 0))); +} + +/************************************************************************ + * * + * Routines to handle extra functions * + * * + ************************************************************************/ + +/** + * xmlXPathRegisterFunc: + * @ctxt: the XPath context + * @name: the function name + * @f: the function implementation or NULL + * + * Register a new function. If @f is NULL it unregisters the function + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterFunc(xmlXPathContextPtr ctxt, const xmlChar *name, + xmlXPathFunction f) { + return(xmlXPathRegisterFuncNS(ctxt, name, NULL, f)); +} + +/** + * xmlXPathRegisterFuncNS: + * @ctxt: the XPath context + * @name: the function name + * @ns_uri: the function namespace URI + * @f: the function implementation or NULL + * + * Register a new function. If @f is NULL it unregisters the function + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterFuncNS(xmlXPathContextPtr ctxt, const xmlChar *name, + const xmlChar *ns_uri, xmlXPathFunction f) { + if (ctxt == NULL) + return(-1); + if (name == NULL) + return(-1); + + if (ctxt->funcHash == NULL) + ctxt->funcHash = xmlHashCreate(0); + if (ctxt->funcHash == NULL) + return(-1); + return(xmlHashAddEntry2(ctxt->funcHash, name, ns_uri, (void *) f)); +} + +/** + * xmlXPathRegisterFuncLookup: + * @ctxt: the XPath context + * @f: the lookup function + * @funcCtxt: the lookup data + * + * Registers an external mechanism to do function lookup. + */ +void +xmlXPathRegisterFuncLookup (xmlXPathContextPtr ctxt, + xmlXPathFuncLookupFunc f, + void *funcCtxt) { + if (ctxt == NULL) + return; + ctxt->funcLookupFunc = (void *) f; + ctxt->funcLookupData = funcCtxt; +} + +/** + * xmlXPathFunctionLookup: + * @ctxt: the XPath context + * @name: the function name + * + * Search in the Function array of the context for the given + * function. + * + * Returns the xmlXPathFunction or NULL if not found + */ +xmlXPathFunction +xmlXPathFunctionLookup(xmlXPathContextPtr ctxt, const xmlChar *name) { + if (ctxt == NULL) + return (NULL); + + if (ctxt->funcLookupFunc != NULL) { + xmlXPathFunction ret; + xmlXPathFuncLookupFunc f; + + f = (xmlXPathFuncLookupFunc) ctxt->funcLookupFunc; + ret = f(ctxt->funcLookupData, name, NULL); + if (ret != NULL) + return(ret); + } + return(xmlXPathFunctionLookupNS(ctxt, name, NULL)); +} + +/** + * xmlXPathFunctionLookupNS: + * @ctxt: the XPath context + * @name: the function name + * @ns_uri: the function namespace URI + * + * Search in the Function array of the context for the given + * function. + * + * Returns the xmlXPathFunction or NULL if not found + */ +xmlXPathFunction +xmlXPathFunctionLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name, + const xmlChar *ns_uri) { + if (ctxt == NULL) + return(NULL); + if (name == NULL) + return(NULL); + + if (ctxt->funcLookupFunc != NULL) { + xmlXPathFunction ret; + xmlXPathFuncLookupFunc f; + + f = (xmlXPathFuncLookupFunc) ctxt->funcLookupFunc; + ret = f(ctxt->funcLookupData, name, ns_uri); + if (ret != NULL) + return(ret); + } + + if (ctxt->funcHash == NULL) + return(NULL); + + return((xmlXPathFunction) xmlHashLookup2(ctxt->funcHash, name, ns_uri)); +} + +/** + * xmlXPathRegisteredFuncsCleanup: + * @ctxt: the XPath context + * + * Cleanup the XPath context data associated to registered functions + */ +void +xmlXPathRegisteredFuncsCleanup(xmlXPathContextPtr ctxt) { + if (ctxt == NULL) + return; + + xmlHashFree(ctxt->funcHash, NULL); + ctxt->funcHash = NULL; +} + +/************************************************************************ + * * + * Routines to handle Variable * + * * + ************************************************************************/ + +/** + * xmlXPathRegisterVariable: + * @ctxt: the XPath context + * @name: the variable name + * @value: the variable value or NULL + * + * Register a new variable value. If @value is NULL it unregisters + * the variable + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterVariable(xmlXPathContextPtr ctxt, const xmlChar *name, + xmlXPathObjectPtr value) { + return(xmlXPathRegisterVariableNS(ctxt, name, NULL, value)); +} + +/** + * xmlXPathRegisterVariableNS: + * @ctxt: the XPath context + * @name: the variable name + * @ns_uri: the variable namespace URI + * @value: the variable value or NULL + * + * Register a new variable value. If @value is NULL it unregisters + * the variable + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterVariableNS(xmlXPathContextPtr ctxt, const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathObjectPtr value) { + if (ctxt == NULL) + return(-1); + if (name == NULL) + return(-1); + + if (ctxt->varHash == NULL) + ctxt->varHash = xmlHashCreate(0); + if (ctxt->varHash == NULL) + return(-1); + return(xmlHashUpdateEntry2(ctxt->varHash, name, ns_uri, + (void *) value, + (xmlHashDeallocator)xmlXPathFreeObject)); +} + +/** + * xmlXPathRegisterVariableLookup: + * @ctxt: the XPath context + * @f: the lookup function + * @data: the lookup data + * + * register an external mechanism to do variable lookup + */ +void +xmlXPathRegisterVariableLookup(xmlXPathContextPtr ctxt, + xmlXPathVariableLookupFunc f, void *data) { + if (ctxt == NULL) + return; + ctxt->varLookupFunc = (void *) f; + ctxt->varLookupData = data; +} + +/** + * xmlXPathVariableLookup: + * @ctxt: the XPath context + * @name: the variable name + * + * Search in the Variable array of the context for the given + * variable value. + * + * Returns a copy of the value or NULL if not found + */ +xmlXPathObjectPtr +xmlXPathVariableLookup(xmlXPathContextPtr ctxt, const xmlChar *name) { + if (ctxt == NULL) + return(NULL); + + if (ctxt->varLookupFunc != NULL) { + xmlXPathObjectPtr ret; + + ret = ((xmlXPathVariableLookupFunc)ctxt->varLookupFunc) + (ctxt->varLookupData, name, NULL); + return(ret); + } + return(xmlXPathVariableLookupNS(ctxt, name, NULL)); +} + +/** + * xmlXPathVariableLookupNS: + * @ctxt: the XPath context + * @name: the variable name + * @ns_uri: the variable namespace URI + * + * Search in the Variable array of the context for the given + * variable value. + * + * Returns the a copy of the value or NULL if not found + */ +xmlXPathObjectPtr +xmlXPathVariableLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name, + const xmlChar *ns_uri) { + if (ctxt == NULL) + return(NULL); + + if (ctxt->varLookupFunc != NULL) { + xmlXPathObjectPtr ret; + + ret = ((xmlXPathVariableLookupFunc)ctxt->varLookupFunc) + (ctxt->varLookupData, name, ns_uri); + if (ret != NULL) return(ret); + } + + if (ctxt->varHash == NULL) + return(NULL); + if (name == NULL) + return(NULL); + + return(xmlXPathObjectCopy((xmlXPathObjectPtr) + xmlHashLookup2(ctxt->varHash, name, ns_uri))); +} + +/** + * xmlXPathRegisteredVariablesCleanup: + * @ctxt: the XPath context + * + * Cleanup the XPath context data associated to registered variables + */ +void +xmlXPathRegisteredVariablesCleanup(xmlXPathContextPtr ctxt) { + if (ctxt == NULL) + return; + + xmlHashFree(ctxt->varHash, (xmlHashDeallocator)xmlXPathFreeObject); + ctxt->varHash = NULL; +} + +/** + * xmlXPathRegisterNs: + * @ctxt: the XPath context + * @prefix: the namespace prefix + * @ns_uri: the namespace name + * + * Register a new namespace. If @ns_uri is NULL it unregisters + * the namespace + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterNs(xmlXPathContextPtr ctxt, const xmlChar *prefix, + const xmlChar *ns_uri) { + if (ctxt == NULL) + return(-1); + if (prefix == NULL) + return(-1); + + if (ctxt->nsHash == NULL) + ctxt->nsHash = xmlHashCreate(10); + if (ctxt->nsHash == NULL) + return(-1); + return(xmlHashUpdateEntry(ctxt->nsHash, prefix, (void *) xmlStrdup(ns_uri), + (xmlHashDeallocator)xmlFree)); +} + +/** + * xmlXPathNsLookup: + * @ctxt: the XPath context + * @prefix: the namespace prefix value + * + * Search in the namespace declaration array of the context for the given + * namespace name associated to the given prefix + * + * Returns the value or NULL if not found + */ +const xmlChar * +xmlXPathNsLookup(xmlXPathContextPtr ctxt, const xmlChar *prefix) { + if (ctxt == NULL) + return(NULL); + if (prefix == NULL) + return(NULL); + +#ifdef XML_XML_NAMESPACE + if (xmlStrEqual(prefix, (const xmlChar *) "xml")) + return(XML_XML_NAMESPACE); +#endif + + if (ctxt->namespaces != NULL) { + int i; + + for (i = 0;i < ctxt->nsNr;i++) { + if ((ctxt->namespaces[i] != NULL) && + (xmlStrEqual(ctxt->namespaces[i]->prefix, prefix))) + return(ctxt->namespaces[i]->href); + } + } + + return((const xmlChar *) xmlHashLookup(ctxt->nsHash, prefix)); +} + +/** + * xmlXPathRegisteredNsCleanup: + * @ctxt: the XPath context + * + * Cleanup the XPath context data associated to registered variables + */ +void +xmlXPathRegisteredNsCleanup(xmlXPathContextPtr ctxt) { + if (ctxt == NULL) + return; + + xmlHashFree(ctxt->nsHash, (xmlHashDeallocator)xmlFree); + ctxt->nsHash = NULL; +} + +/************************************************************************ + * * + * Routines to handle Values * + * * + ************************************************************************/ + +/* Allocations are terrible, one need to optimize all this !!! */ + +/** + * xmlXPathNewFloat: + * @val: the double value + * + * Create a new xmlXPathObjectPtr of type double and of value @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewFloat(double val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewFloat: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_NUMBER; + ret->floatval = val; + return(ret); +} + +/** + * xmlXPathNewBoolean: + * @val: the boolean value + * + * Create a new xmlXPathObjectPtr of type boolean and of value @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewBoolean(int val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewBoolean: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_BOOLEAN; + ret->boolval = (val != 0); + return(ret); +} + +/** + * xmlXPathNewString: + * @val: the xmlChar * value + * + * Create a new xmlXPathObjectPtr of type string and of value @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewString(const xmlChar *val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewString: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_STRING; + if (val != NULL) + ret->stringval = xmlStrdup(val); + else + ret->stringval = xmlStrdup((const xmlChar *)""); + return(ret); +} + +/** + * xmlXPathWrapString: + * @val: the xmlChar * value + * + * Wraps the @val string into an XPath object. + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathWrapString (xmlChar *val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathWrapString: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_STRING; + ret->stringval = val; + return(ret); +} + +/** + * xmlXPathNewCString: + * @val: the char * value + * + * Create a new xmlXPathObjectPtr of type string and of value @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewCString(const char *val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewCString: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_STRING; + ret->stringval = xmlStrdup(BAD_CAST val); + return(ret); +} + +/** + * xmlXPathWrapCString: + * @val: the char * value + * + * Wraps a string into an XPath object. + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathWrapCString (char * val) { + return(xmlXPathWrapString((xmlChar *)(val))); +} + +/** + * xmlXPathWrapExternal: + * @val: the user data + * + * Wraps the @val data into an XPath object. + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathWrapExternal (void *val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathWrapExternal: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_USERS; + ret->user = val; + return(ret); +} + +/** + * xmlXPathObjectCopy: + * @val: the original object + * + * allocate a new copy of a given object + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathObjectCopy(xmlXPathObjectPtr val) { + xmlXPathObjectPtr ret; + + if (val == NULL) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathObjectCopy: out of memory\n"); + return(NULL); + } + memcpy(ret, val , (size_t) sizeof(xmlXPathObject)); + switch (val->type) { + case XPATH_BOOLEAN: + case XPATH_NUMBER: + case XPATH_POINT: + case XPATH_RANGE: + break; + case XPATH_STRING: + ret->stringval = xmlStrdup(val->stringval); + break; + case XPATH_XSLT_TREE: + if ((val->nodesetval != NULL) && + (val->nodesetval->nodeTab != NULL)) { + ret->boolval = 1; + ret->user = xmlDocCopyNode(val->nodesetval->nodeTab[0], + val->nodesetval->nodeTab[0]->doc, 1); + ret->nodesetval = xmlXPathNodeSetCreate( + (xmlNodePtr) ret->user); + } else + ret->nodesetval = xmlXPathNodeSetCreate(NULL); + /* Deallocate the copied tree value */ + break; + case XPATH_NODESET: + ret->nodesetval = xmlXPathNodeSetMerge(NULL, val->nodesetval); + /* Do not deallocate the copied tree value */ + ret->boolval = 0; + break; + case XPATH_LOCATIONSET: +#ifdef LIBXML_XPTR_ENABLED + { + xmlLocationSetPtr loc = val->user; + ret->user = (void *) xmlXPtrLocationSetMerge(NULL, loc); + break; + } +#endif + case XPATH_USERS: + ret->user = val->user; + break; + case XPATH_UNDEFINED: + xmlGenericError(xmlGenericErrorContext, + "xmlXPathObjectCopy: unsupported type %d\n", + val->type); + break; + } + return(ret); +} + +/** + * xmlXPathFreeObject: + * @obj: the object to free + * + * Free up an xmlXPathObjectPtr object. + */ +void +xmlXPathFreeObject(xmlXPathObjectPtr obj) { + if (obj == NULL) return; + if ((obj->type == XPATH_NODESET) || (obj->type == XPATH_XSLT_TREE)) { + if (obj->boolval) { + if (obj->user != NULL) { + xmlXPathFreeNodeSet(obj->nodesetval); + xmlFreeNodeList((xmlNodePtr) obj->user); + } else if (obj->nodesetval != NULL) + xmlXPathFreeValueTree(obj->nodesetval); + } else { + if (obj->nodesetval != NULL) + xmlXPathFreeNodeSet(obj->nodesetval); + } +#ifdef LIBXML_XPTR_ENABLED + } else if (obj->type == XPATH_LOCATIONSET) { + if (obj->user != NULL) + xmlXPtrFreeLocationSet(obj->user); +#endif + } else if (obj->type == XPATH_STRING) { + if (obj->stringval != NULL) + xmlFree(obj->stringval); + } + + xmlFree(obj); +} + + +/************************************************************************ + * * + * Type Casting Routines * + * * + ************************************************************************/ + +/** + * xmlXPathCastBooleanToString: + * @val: a boolean + * + * Converts a boolean to its string value. + * + * Returns a newly allocated string. + */ +xmlChar * +xmlXPathCastBooleanToString (int val) { + xmlChar *ret; + if (val) + ret = xmlStrdup((const xmlChar *) "true"); + else + ret = xmlStrdup((const xmlChar *) "false"); + return(ret); +} + +/** + * xmlXPathCastNumberToString: + * @val: a number + * + * Converts a number to its string value. + * + * Returns a newly allocated string. + */ +xmlChar * +xmlXPathCastNumberToString (double val) { + xmlChar *ret; + switch (xmlXPathIsInf(val)) { + case 1: + ret = xmlStrdup((const xmlChar *) "Infinity"); + break; + case -1: + ret = xmlStrdup((const xmlChar *) "-Infinity"); + break; + default: + if (xmlXPathIsNaN(val)) { + ret = xmlStrdup((const xmlChar *) "NaN"); + } else if (val == 0 && xmlXPathGetSign(val) != 0) { + ret = xmlStrdup((const xmlChar *) "0"); + } else { + /* could be improved */ + char buf[100]; + xmlXPathFormatNumber(val, buf, 100); + ret = xmlStrdup((const xmlChar *) buf); + } + } + return(ret); +} + +/** + * xmlXPathCastNodeToString: + * @node: a node + * + * Converts a node to its string value. + * + * Returns a newly allocated string. + */ +xmlChar * +xmlXPathCastNodeToString (xmlNodePtr node) { + if ((node != NULL) && (node->type == XML_DOCUMENT_NODE)) + node = xmlDocGetRootElement((xmlDocPtr) node); + return(xmlNodeGetContent(node)); +} + +/** + * xmlXPathCastNodeSetToString: + * @ns: a node-set + * + * Converts a node-set to its string value. + * + * Returns a newly allocated string. + */ +xmlChar * +xmlXPathCastNodeSetToString (xmlNodeSetPtr ns) { + if ((ns == NULL) || (ns->nodeNr == 0) || (ns->nodeTab == NULL)) + return(xmlStrdup((const xmlChar *) "")); + + xmlXPathNodeSetSort(ns); + return(xmlXPathCastNodeToString(ns->nodeTab[0])); +} + +/** + * xmlXPathCastToString: + * @val: an XPath object + * + * Converts an existing object to its string() equivalent + * + * Returns the string value of the object, NULL in case of error. + * A new string is allocated only if needed (@val isn't a + * string object). + */ +xmlChar * +xmlXPathCastToString(xmlXPathObjectPtr val) { + xmlChar *ret = NULL; + + if (val == NULL) + return(xmlStrdup((const xmlChar *) "")); + switch (val->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, "String: undefined\n"); +#endif + ret = xmlStrdup((const xmlChar *) ""); + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + ret = xmlXPathCastNodeSetToString(val->nodesetval); + break; + case XPATH_STRING: + return(xmlStrdup(val->stringval)); + case XPATH_BOOLEAN: + ret = xmlXPathCastBooleanToString(val->boolval); + break; + case XPATH_NUMBER: { + ret = xmlXPathCastNumberToString(val->floatval); + break; + } + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + ret = xmlStrdup((const xmlChar *) ""); + break; + } + return(ret); +} + +/** + * xmlXPathConvertString: + * @val: an XPath object + * + * Converts an existing object to its string() equivalent + * + * Returns the new object, the old one is freed (or the operation + * is done directly on @val) + */ +xmlXPathObjectPtr +xmlXPathConvertString(xmlXPathObjectPtr val) { + xmlChar *res = NULL; + + if (val == NULL) + return(xmlXPathNewCString("")); + + switch (val->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, "STRING: undefined\n"); +#endif + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + res = xmlXPathCastNodeSetToString(val->nodesetval); + break; + case XPATH_STRING: + return(val); + case XPATH_BOOLEAN: + res = xmlXPathCastBooleanToString(val->boolval); + break; + case XPATH_NUMBER: + res = xmlXPathCastNumberToString(val->floatval); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO; + break; + } + xmlXPathFreeObject(val); + if (res == NULL) + return(xmlXPathNewCString("")); + return(xmlXPathWrapString(res)); +} + +/** + * xmlXPathCastBooleanToNumber: + * @val: a boolean + * + * Converts a boolean to its number value + * + * Returns the number value + */ +double +xmlXPathCastBooleanToNumber(int val) { + if (val) + return(1.0); + return(0.0); +} + +/** + * xmlXPathCastStringToNumber: + * @val: a string + * + * Converts a string to its number value + * + * Returns the number value + */ +double +xmlXPathCastStringToNumber(const xmlChar * val) { + return(xmlXPathStringEvalNumber(val)); +} + +/** + * xmlXPathCastNodeToNumber: + * @node: a node + * + * Converts a node to its number value + * + * Returns the number value + */ +double +xmlXPathCastNodeToNumber (xmlNodePtr node) { + xmlChar *strval; + double ret; + + if (node == NULL) + return(xmlXPathNAN); + strval = xmlXPathCastNodeToString(node); + if (strval == NULL) + return(xmlXPathNAN); + ret = xmlXPathCastStringToNumber(strval); + xmlFree(strval); + + return(ret); +} + +/** + * xmlXPathCastNodeSetToNumber: + * @ns: a node-set + * + * Converts a node-set to its number value + * + * Returns the number value + */ +double +xmlXPathCastNodeSetToNumber (xmlNodeSetPtr ns) { + xmlChar *str; + double ret; + + if (ns == NULL) + return(xmlXPathNAN); + str = xmlXPathCastNodeSetToString(ns); + ret = xmlXPathCastStringToNumber(str); + xmlFree(str); + return(ret); +} + +/** + * xmlXPathCastToNumber: + * @val: an XPath object + * + * Converts an XPath object to its number value + * + * Returns the number value + */ +double +xmlXPathCastToNumber(xmlXPathObjectPtr val) { + double ret = 0.0; + + if (val == NULL) + return(xmlXPathNAN); + switch (val->type) { + case XPATH_UNDEFINED: +#ifdef DEGUB_EXPR + xmlGenericError(xmlGenericErrorContext, "NUMBER: undefined\n"); +#endif + ret = xmlXPathNAN; + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + ret = xmlXPathCastNodeSetToNumber(val->nodesetval); + break; + case XPATH_STRING: + ret = xmlXPathCastStringToNumber(val->stringval); + break; + case XPATH_NUMBER: + ret = val->floatval; + break; + case XPATH_BOOLEAN: + ret = xmlXPathCastBooleanToNumber(val->boolval); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO; + ret = xmlXPathNAN; + break; + } + return(ret); +} + +/** + * xmlXPathConvertNumber: + * @val: an XPath object + * + * Converts an existing object to its number() equivalent + * + * Returns the new object, the old one is freed (or the operation + * is done directly on @val) + */ +xmlXPathObjectPtr +xmlXPathConvertNumber(xmlXPathObjectPtr val) { + xmlXPathObjectPtr ret; + + if (val == NULL) + return(xmlXPathNewFloat(0.0)); + if (val->type == XPATH_NUMBER) + return(val); + ret = xmlXPathNewFloat(xmlXPathCastToNumber(val)); + xmlXPathFreeObject(val); + return(ret); +} + +/** + * xmlXPathCastNumberToBoolean: + * @val: a number + * + * Converts a number to its boolean value + * + * Returns the boolean value + */ +int +xmlXPathCastNumberToBoolean (double val) { + if (xmlXPathIsNaN(val) || (val == 0.0)) + return(0); + return(1); +} + +/** + * xmlXPathCastStringToBoolean: + * @val: a string + * + * Converts a string to its boolean value + * + * Returns the boolean value + */ +int +xmlXPathCastStringToBoolean (const xmlChar *val) { + if ((val == NULL) || (xmlStrlen(val) == 0)) + return(0); + return(1); +} + +/** + * xmlXPathCastNodeSetToBoolean: + * @ns: a node-set + * + * Converts a node-set to its boolean value + * + * Returns the boolean value + */ +int +xmlXPathCastNodeSetToBoolean (xmlNodeSetPtr ns) { + if ((ns == NULL) || (ns->nodeNr == 0)) + return(0); + return(1); +} + +/** + * xmlXPathCastToBoolean: + * @val: an XPath object + * + * Converts an XPath object to its boolean value + * + * Returns the boolean value + */ +int +xmlXPathCastToBoolean (xmlXPathObjectPtr val) { + int ret = 0; + + if (val == NULL) + return(0); + switch (val->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, "BOOLEAN: undefined\n"); +#endif + ret = 0; + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + ret = xmlXPathCastNodeSetToBoolean(val->nodesetval); + break; + case XPATH_STRING: + ret = xmlXPathCastStringToBoolean(val->stringval); + break; + case XPATH_NUMBER: + ret = xmlXPathCastNumberToBoolean(val->floatval); + break; + case XPATH_BOOLEAN: + ret = val->boolval; + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO; + ret = 0; + break; + } + return(ret); +} + + +/** + * xmlXPathConvertBoolean: + * @val: an XPath object + * + * Converts an existing object to its boolean() equivalent + * + * Returns the new object, the old one is freed (or the operation + * is done directly on @val) + */ +xmlXPathObjectPtr +xmlXPathConvertBoolean(xmlXPathObjectPtr val) { + xmlXPathObjectPtr ret; + + if (val == NULL) + return(xmlXPathNewBoolean(0)); + if (val->type == XPATH_BOOLEAN) + return(val); + ret = xmlXPathNewBoolean(xmlXPathCastToBoolean(val)); + xmlXPathFreeObject(val); + return(ret); +} + +/************************************************************************ + * * + * Routines to handle XPath contexts * + * * + ************************************************************************/ + +/** + * xmlXPathNewContext: + * @doc: the XML document + * + * Create a new xmlXPathContext + * + * Returns the xmlXPathContext just allocated. The caller will need to free it. + */ +xmlXPathContextPtr +xmlXPathNewContext(xmlDocPtr doc) { + xmlXPathContextPtr ret; + + ret = (xmlXPathContextPtr) xmlMalloc(sizeof(xmlXPathContext)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewContext: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathContext)); + ret->doc = doc; + ret->node = NULL; + + ret->varHash = NULL; + + ret->nb_types = 0; + ret->max_types = 0; + ret->types = NULL; + + ret->funcHash = xmlHashCreate(0); + + ret->nb_axis = 0; + ret->max_axis = 0; + ret->axis = NULL; + + ret->nsHash = NULL; + ret->user = NULL; + + ret->contextSize = -1; + ret->proximityPosition = -1; + + xmlXPathRegisterAllFunctions(ret); + + return(ret); +} + +/** + * xmlXPathFreeContext: + * @ctxt: the context to free + * + * Free up an xmlXPathContext + */ +void +xmlXPathFreeContext(xmlXPathContextPtr ctxt) { + xmlXPathRegisteredNsCleanup(ctxt); + xmlXPathRegisteredFuncsCleanup(ctxt); + xmlXPathRegisteredVariablesCleanup(ctxt); + xmlFree(ctxt); +} + +/************************************************************************ + * * + * Routines to handle XPath parser contexts * + * * + ************************************************************************/ + +#define CHECK_CTXT(ctxt) \ + if (ctxt == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "%s:%d Internal error: ctxt == NULL\n", \ + __FILE__, __LINE__); \ + } \ + + +#define CHECK_CONTEXT(ctxt) \ + if (ctxt == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "%s:%d Internal error: no context\n", \ + __FILE__, __LINE__); \ + } \ + else if (ctxt->doc == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "%s:%d Internal error: no document\n", \ + __FILE__, __LINE__); \ + } \ + else if (ctxt->doc->children == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "%s:%d Internal error: document without root\n", \ + __FILE__, __LINE__); \ + } \ + + +/** + * xmlXPathNewParserContext: + * @str: the XPath expression + * @ctxt: the XPath context + * + * Create a new xmlXPathParserContext + * + * Returns the xmlXPathParserContext just allocated. + */ +xmlXPathParserContextPtr +xmlXPathNewParserContext(const xmlChar *str, xmlXPathContextPtr ctxt) { + xmlXPathParserContextPtr ret; + + ret = (xmlXPathParserContextPtr) xmlMalloc(sizeof(xmlXPathParserContext)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewParserContext: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathParserContext)); + ret->cur = ret->base = str; + ret->context = ctxt; + + ret->comp = xmlXPathNewCompExpr(); + if (ret->comp == NULL) { + xmlFree(ret->valueTab); + xmlFree(ret); + return(NULL); + } + + return(ret); +} + +/** + * xmlXPathCompParserContext: + * @comp: the XPath compiled expression + * @ctxt: the XPath context + * + * Create a new xmlXPathParserContext when processing a compiled expression + * + * Returns the xmlXPathParserContext just allocated. + */ +static xmlXPathParserContextPtr +xmlXPathCompParserContext(xmlXPathCompExprPtr comp, xmlXPathContextPtr ctxt) { + xmlXPathParserContextPtr ret; + + ret = (xmlXPathParserContextPtr) xmlMalloc(sizeof(xmlXPathParserContext)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompParserContext: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathParserContext)); + + /* Allocate the value stack */ + ret->valueTab = (xmlXPathObjectPtr *) + xmlMalloc(10 * sizeof(xmlXPathObjectPtr)); + if (ret->valueTab == NULL) { + xmlFree(ret); + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompParserContext: out of memory\n"); + return(NULL); + } + ret->valueNr = 0; + ret->valueMax = 10; + ret->value = NULL; + + ret->context = ctxt; + ret->comp = comp; + + return(ret); +} + +/** + * xmlXPathFreeParserContext: + * @ctxt: the context to free + * + * Free up an xmlXPathParserContext + */ +void +xmlXPathFreeParserContext(xmlXPathParserContextPtr ctxt) { + if (ctxt->valueTab != NULL) { + xmlFree(ctxt->valueTab); + } + if (ctxt->comp) + xmlXPathFreeCompExpr(ctxt->comp); + xmlFree(ctxt); +} + +/************************************************************************ + * * + * The implicit core function library * + * * + ************************************************************************/ + +/** + * xmlXPathNodeValHash: + * @node: a node pointer + * + * Function computing the beginning of the string value of the node, + * used to speed up comparisons + * + * Returns an int usable as a hash + */ +static unsigned int +xmlXPathNodeValHash(xmlNodePtr node) { + int len = 2; + const xmlChar * string = NULL; + xmlNodePtr tmp = NULL; + unsigned int ret = 0; + + if (node == NULL) + return(0); + + + switch (node->type) { + case XML_COMMENT_NODE: + case XML_PI_NODE: + case XML_CDATA_SECTION_NODE: + case XML_TEXT_NODE: + string = node->content; + if (string == NULL) + return(0); + if (string[0] == 0) + return(0); + return(((unsigned int) string[0]) + + (((unsigned int) string[1]) << 8)); + case XML_NAMESPACE_DECL: + string = ((xmlNsPtr)node)->href; + if (string == NULL) + return(0); + if (string[0] == 0) + return(0); + return(((unsigned int) string[0]) + + (((unsigned int) string[1]) << 8)); + case XML_ATTRIBUTE_NODE: + tmp = ((xmlAttrPtr) node)->children; + break; + case XML_ELEMENT_NODE: + tmp = node->children; + break; + default: + return(0); + } + while (tmp != NULL) { + switch (tmp->type) { + case XML_COMMENT_NODE: + case XML_PI_NODE: + case XML_CDATA_SECTION_NODE: + case XML_TEXT_NODE: + string = tmp->content; + break; + case XML_NAMESPACE_DECL: + string = ((xmlNsPtr)tmp)->href; + break; + default: + break; + } + if ((string != NULL) && (string[0] != 0)) { + if (string[0] == 0) + return(0); + if (len == 1) { + return(ret + (((unsigned int) string[0]) << 8)); + } + if (string[1] == 0) { + len = 1; + ret = (unsigned int) string[0]; + } else { + return(((unsigned int) string[0]) + + (((unsigned int) string[1]) << 8)); + } + } + /* + * Skip to next node + */ + if ((tmp->children != NULL) && (tmp->type != XML_DTD_NODE)) { + if (tmp->children->type != XML_ENTITY_DECL) { + tmp = tmp->children; + continue; + } + } + if (tmp == node) + break; + + if (tmp->next != NULL) { + tmp = tmp->next; + continue; + } + + do { + tmp = tmp->parent; + if (tmp == NULL) + break; + if (tmp == node) { + tmp = NULL; + break; + } + if (tmp->next != NULL) { + tmp = tmp->next; + break; + } + } while (tmp != NULL); + } + return(ret); +} + +/** + * xmlXPathStringHash: + * @string: a string + * + * Function computing the beginning of the string value of the node, + * used to speed up comparisons + * + * Returns an int usable as a hash + */ +static unsigned int +xmlXPathStringHash(const xmlChar * string) { + if (string == NULL) + return((unsigned int) 0); + if (string[0] == 0) + return(0); + return(((unsigned int) string[0]) + + (((unsigned int) string[1]) << 8)); +} + +/** + * xmlXPathCompareNodeSetFloat: + * @ctxt: the XPath Parser context + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * @arg: the node set + * @f: the value + * + * Implement the compare operation between a nodeset and a number + * @ns < @val (1, 1, ... + * @ns <= @val (1, 0, ... + * @ns > @val (0, 1, ... + * @ns >= @val (0, 0, ... + * + * If one object to be compared is a node-set and the other is a number, + * then the comparison will be true if and only if there is a node in the + * node-set such that the result of performing the comparison on the number + * to be compared and on the result of converting the string-value of that + * node to a number using the number function is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +static int +xmlXPathCompareNodeSetFloat(xmlXPathParserContextPtr ctxt, int inf, int strict, + xmlXPathObjectPtr arg, xmlXPathObjectPtr f) { + int i, ret = 0; + xmlNodeSetPtr ns; + xmlChar *str2; + + if ((f == NULL) || (arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) { + xmlXPathFreeObject(arg); + xmlXPathFreeObject(f); + return(0); + } + ns = arg->nodesetval; + if (ns != NULL) { + for (i = 0;i < ns->nodeNr;i++) { + str2 = xmlXPathCastNodeToString(ns->nodeTab[i]); + if (str2 != NULL) { + valuePush(ctxt, + xmlXPathNewString(str2)); + xmlFree(str2); + xmlXPathNumberFunction(ctxt, 1); + valuePush(ctxt, xmlXPathObjectCopy(f)); + ret = xmlXPathCompareValues(ctxt, inf, strict); + if (ret) + break; + } + } + } + xmlXPathFreeObject(arg); + xmlXPathFreeObject(f); + return(ret); +} + +/** + * xmlXPathCompareNodeSetString: + * @ctxt: the XPath Parser context + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * @arg: the node set + * @s: the value + * + * Implement the compare operation between a nodeset and a string + * @ns < @val (1, 1, ... + * @ns <= @val (1, 0, ... + * @ns > @val (0, 1, ... + * @ns >= @val (0, 0, ... + * + * If one object to be compared is a node-set and the other is a string, + * then the comparison will be true if and only if there is a node in + * the node-set such that the result of performing the comparison on the + * string-value of the node and the other string is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +static int +xmlXPathCompareNodeSetString(xmlXPathParserContextPtr ctxt, int inf, int strict, + xmlXPathObjectPtr arg, xmlXPathObjectPtr s) { + int i, ret = 0; + xmlNodeSetPtr ns; + xmlChar *str2; + + if ((s == NULL) || (arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) { + xmlXPathFreeObject(arg); + xmlXPathFreeObject(s); + return(0); + } + ns = arg->nodesetval; + if (ns != NULL) { + for (i = 0;i < ns->nodeNr;i++) { + str2 = xmlXPathCastNodeToString(ns->nodeTab[i]); + if (str2 != NULL) { + valuePush(ctxt, + xmlXPathNewString(str2)); + xmlFree(str2); + valuePush(ctxt, xmlXPathObjectCopy(s)); + ret = xmlXPathCompareValues(ctxt, inf, strict); + if (ret) + break; + } + } + } + xmlXPathFreeObject(arg); + xmlXPathFreeObject(s); + return(ret); +} + +/** + * xmlXPathCompareNodeSets: + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * @arg1: the first node set object + * @arg2: the second node set object + * + * Implement the compare operation on nodesets: + * + * If both objects to be compared are node-sets, then the comparison + * will be true if and only if there is a node in the first node-set + * and a node in the second node-set such that the result of performing + * the comparison on the string-values of the two nodes is true. + * .... + * When neither object to be compared is a node-set and the operator + * is <=, <, >= or >, then the objects are compared by converting both + * objects to numbers and comparing the numbers according to IEEE 754. + * .... + * The number function converts its argument to a number as follows: + * - a string that consists of optional whitespace followed by an + * optional minus sign followed by a Number followed by whitespace + * is converted to the IEEE 754 number that is nearest (according + * to the IEEE 754 round-to-nearest rule) to the mathematical value + * represented by the string; any other string is converted to NaN + * + * Conclusion all nodes need to be converted first to their string value + * and then the comparison must be done when possible + */ +static int +xmlXPathCompareNodeSets(int inf, int strict, + xmlXPathObjectPtr arg1, xmlXPathObjectPtr arg2) { + int i, j, init = 0; + double val1; + double *values2; + int ret = 0; + xmlNodeSetPtr ns1; + xmlNodeSetPtr ns2; + + if ((arg1 == NULL) || + ((arg1->type != XPATH_NODESET) && (arg1->type != XPATH_XSLT_TREE))) { + xmlXPathFreeObject(arg2); + return(0); + } + if ((arg2 == NULL) || + ((arg2->type != XPATH_NODESET) && (arg2->type != XPATH_XSLT_TREE))) { + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(0); + } + + ns1 = arg1->nodesetval; + ns2 = arg2->nodesetval; + + if ((ns1 == NULL) || (ns1->nodeNr <= 0)) { + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(0); + } + if ((ns2 == NULL) || (ns2->nodeNr <= 0)) { + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(0); + } + + values2 = (double *) xmlMalloc(ns2->nodeNr * sizeof(double)); + if (values2 == NULL) { + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(0); + } + for (i = 0;i < ns1->nodeNr;i++) { + val1 = xmlXPathCastNodeToNumber(ns1->nodeTab[i]); + if (xmlXPathIsNaN(val1)) + continue; + for (j = 0;j < ns2->nodeNr;j++) { + if (init == 0) { + values2[j] = xmlXPathCastNodeToNumber(ns2->nodeTab[j]); + } + if (xmlXPathIsNaN(values2[j])) + continue; + if (inf && strict) + ret = (val1 < values2[j]); + else if (inf && !strict) + ret = (val1 <= values2[j]); + else if (!inf && strict) + ret = (val1 > values2[j]); + else if (!inf && !strict) + ret = (val1 >= values2[j]); + if (ret) + break; + } + if (ret) + break; + init = 1; + } + xmlFree(values2); + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(ret); +} + +/** + * xmlXPathCompareNodeSetValue: + * @ctxt: the XPath Parser context + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * @arg: the node set + * @val: the value + * + * Implement the compare operation between a nodeset and a value + * @ns < @val (1, 1, ... + * @ns <= @val (1, 0, ... + * @ns > @val (0, 1, ... + * @ns >= @val (0, 0, ... + * + * If one object to be compared is a node-set and the other is a boolean, + * then the comparison will be true if and only if the result of performing + * the comparison on the boolean and on the result of converting + * the node-set to a boolean using the boolean function is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +static int +xmlXPathCompareNodeSetValue(xmlXPathParserContextPtr ctxt, int inf, int strict, + xmlXPathObjectPtr arg, xmlXPathObjectPtr val) { + if ((val == NULL) || (arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) + return(0); + + switch(val->type) { + case XPATH_NUMBER: + return(xmlXPathCompareNodeSetFloat(ctxt, inf, strict, arg, val)); + case XPATH_NODESET: + case XPATH_XSLT_TREE: + return(xmlXPathCompareNodeSets(inf, strict, arg, val)); + case XPATH_STRING: + return(xmlXPathCompareNodeSetString(ctxt, inf, strict, arg, val)); + case XPATH_BOOLEAN: + valuePush(ctxt, arg); + xmlXPathBooleanFunction(ctxt, 1); + valuePush(ctxt, val); + return(xmlXPathCompareValues(ctxt, inf, strict)); + default: + TODO + } + return(0); +} + +/** + * xmlXPathEqualNodeSetString: + * @arg: the nodeset object argument + * @str: the string to compare to. + * @neq: flag to show whether for '=' (0) or '!=' (1) + * + * Implement the equal operation on XPath objects content: @arg1 == @arg2 + * If one object to be compared is a node-set and the other is a string, + * then the comparison will be true if and only if there is a node in + * the node-set such that the result of performing the comparison on the + * string-value of the node and the other string is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +static int +xmlXPathEqualNodeSetString(xmlXPathObjectPtr arg, const xmlChar * str, int neq) +{ + int i; + xmlNodeSetPtr ns; + xmlChar *str2; + unsigned int hash; + + if ((str == NULL) || (arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) + return (0); + ns = arg->nodesetval; + hash = xmlXPathStringHash(str); + if (ns == NULL) + return (0); + if (ns->nodeNr <= 0) { + if (hash == 0) + return(neq ^ 1); + return(neq); + } + for (i = 0; i < ns->nodeNr; i++) { + if (xmlXPathNodeValHash(ns->nodeTab[i]) == hash) { + str2 = xmlNodeGetContent(ns->nodeTab[i]); + if ((str2 != NULL) && (xmlStrEqual(str, str2))) { + xmlFree(str2); + if (neq) + continue; + return (1); + } else if (neq) { + if (str2 != NULL) + xmlFree(str2); + return (1); + } + if (str2 != NULL) + xmlFree(str2); + } else if (neq) + return (1); + } + return (0); +} + +/** + * xmlXPathEqualNodeSetFloat: + * @arg: the nodeset object argument + * @f: the float to compare to + * @neq: flag to show whether to compare '=' (0) or '!=' (1) + * + * Implement the equal operation on XPath objects content: @arg1 == @arg2 + * If one object to be compared is a node-set and the other is a number, + * then the comparison will be true if and only if there is a node in + * the node-set such that the result of performing the comparison on the + * number to be compared and on the result of converting the string-value + * of that node to a number using the number function is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +static int +xmlXPathEqualNodeSetFloat(xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr arg, double f, int neq) { + int i, ret=0; + xmlNodeSetPtr ns; + xmlChar *str2; + xmlXPathObjectPtr val; + double v; + + if ((arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) + return(0); + + ns = arg->nodesetval; + if (ns != NULL) { + for (i=0;i<ns->nodeNr;i++) { + str2 = xmlXPathCastNodeToString(ns->nodeTab[i]); + if (str2 != NULL) { + valuePush(ctxt, xmlXPathNewString(str2)); + xmlFree(str2); + xmlXPathNumberFunction(ctxt, 1); + val = valuePop(ctxt); + v = val->floatval; + xmlXPathFreeObject(val); + if (!xmlXPathIsNaN(v)) { + if ((!neq) && (v==f)) { + ret = 1; + break; + } else if ((neq) && (v!=f)) { + ret = 1; + break; + } + } + } + } + } + + return(ret); +} + + +/** + * xmlXPathEqualNodeSets: + * @arg1: first nodeset object argument + * @arg2: second nodeset object argument + * @neq: flag to show whether to test '=' (0) or '!=' (1) + * + * Implement the equal / not equal operation on XPath nodesets: + * @arg1 == @arg2 or @arg1 != @arg2 + * If both objects to be compared are node-sets, then the comparison + * will be true if and only if there is a node in the first node-set and + * a node in the second node-set such that the result of performing the + * comparison on the string-values of the two nodes is true. + * + * (needless to say, this is a costly operation) + * + * Returns 0 or 1 depending on the results of the test. + */ +static int +xmlXPathEqualNodeSets(xmlXPathObjectPtr arg1, xmlXPathObjectPtr arg2, int neq) { + int i, j; + unsigned int *hashs1; + unsigned int *hashs2; + xmlChar **values1; + xmlChar **values2; + int ret = 0; + xmlNodeSetPtr ns1; + xmlNodeSetPtr ns2; + + if ((arg1 == NULL) || + ((arg1->type != XPATH_NODESET) && (arg1->type != XPATH_XSLT_TREE))) + return(0); + if ((arg2 == NULL) || + ((arg2->type != XPATH_NODESET) && (arg2->type != XPATH_XSLT_TREE))) + return(0); + + ns1 = arg1->nodesetval; + ns2 = arg2->nodesetval; + + if ((ns1 == NULL) || (ns1->nodeNr <= 0)) + return(0); + if ((ns2 == NULL) || (ns2->nodeNr <= 0)) + return(0); + + /* + * for equal, check if there is a node pertaining to both sets + */ + if (neq == 0) + for (i = 0;i < ns1->nodeNr;i++) + for (j = 0;j < ns2->nodeNr;j++) + if (ns1->nodeTab[i] == ns2->nodeTab[j]) + return(1); + + values1 = (xmlChar **) xmlMalloc(ns1->nodeNr * sizeof(xmlChar *)); + if (values1 == NULL) + return(0); + hashs1 = (unsigned int *) xmlMalloc(ns1->nodeNr * sizeof(unsigned int)); + if (hashs1 == NULL) { + xmlFree(values1); + return(0); + } + memset(values1, 0, ns1->nodeNr * sizeof(xmlChar *)); + values2 = (xmlChar **) xmlMalloc(ns2->nodeNr * sizeof(xmlChar *)); + if (values2 == NULL) { + xmlFree(hashs1); + xmlFree(values1); + return(0); + } + hashs2 = (unsigned int *) xmlMalloc(ns2->nodeNr * sizeof(unsigned int)); + if (hashs2 == NULL) { + xmlFree(hashs1); + xmlFree(values1); + xmlFree(values2); + return(0); + } + memset(values2, 0, ns2->nodeNr * sizeof(xmlChar *)); + for (i = 0;i < ns1->nodeNr;i++) { + hashs1[i] = xmlXPathNodeValHash(ns1->nodeTab[i]); + for (j = 0;j < ns2->nodeNr;j++) { + if (i == 0) + hashs2[j] = xmlXPathNodeValHash(ns2->nodeTab[j]); + if (hashs1[i] != hashs2[j]) { + if (neq) { + ret = 1; + break; + } + } + else { + if (values1[i] == NULL) + values1[i] = xmlNodeGetContent(ns1->nodeTab[i]); + if (values2[j] == NULL) + values2[j] = xmlNodeGetContent(ns2->nodeTab[j]); + ret = xmlStrEqual(values1[i], values2[j]) ^ neq; + if (ret) + break; + } + } + if (ret) + break; + } + for (i = 0;i < ns1->nodeNr;i++) + if (values1[i] != NULL) + xmlFree(values1[i]); + for (j = 0;j < ns2->nodeNr;j++) + if (values2[j] != NULL) + xmlFree(values2[j]); + xmlFree(values1); + xmlFree(values2); + xmlFree(hashs1); + xmlFree(hashs2); + return(ret); +} + +static int +xmlXPathEqualValuesCommon(xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr arg1, xmlXPathObjectPtr arg2) { + int ret = 0; + /* + *At this point we are assured neither arg1 nor arg2 + *is a nodeset, so we can just pick the appropriate routine. + */ + switch (arg1->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_BOOLEAN: + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_BOOLEAN: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: %d boolean %d \n", + arg1->boolval, arg2->boolval); +#endif + ret = (arg1->boolval == arg2->boolval); + break; + case XPATH_NUMBER: + ret = (arg1->boolval == + xmlXPathCastNumberToBoolean(arg2->floatval)); + break; + case XPATH_STRING: + if ((arg2->stringval == NULL) || + (arg2->stringval[0] == 0)) ret = 0; + else + ret = 1; + ret = (arg1->boolval == ret); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + break; + } + break; + case XPATH_NUMBER: + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_BOOLEAN: + ret = (arg2->boolval== + xmlXPathCastNumberToBoolean(arg1->floatval)); + break; + case XPATH_STRING: + valuePush(ctxt, arg2); + xmlXPathNumberFunction(ctxt, 1); + arg2 = valuePop(ctxt); + /* no break on purpose */ + case XPATH_NUMBER: + /* Hand check NaN and Infinity equalities */ + if (xmlXPathIsNaN(arg1->floatval) || xmlXPathIsNaN(arg2->floatval)) { + ret = 0; + } else if (xmlXPathIsInf(arg1->floatval) == 1) { + if (xmlXPathIsInf(arg2->floatval) == 1) + ret = 1; + else + ret = 0; + } else if (xmlXPathIsInf(arg1->floatval) == -1) { + if (xmlXPathIsInf(arg2->floatval) == -1) + ret = 1; + else + ret = 0; + } else if (xmlXPathIsInf(arg2->floatval) == 1) { + if (xmlXPathIsInf(arg1->floatval) == 1) + ret = 1; + else + ret = 0; + } else if (xmlXPathIsInf(arg2->floatval) == -1) { + if (xmlXPathIsInf(arg1->floatval) == -1) + ret = 1; + else + ret = 0; + } else { + ret = (arg1->floatval == arg2->floatval); + } + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + break; + } + break; + case XPATH_STRING: + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_BOOLEAN: + if ((arg1->stringval == NULL) || + (arg1->stringval[0] == 0)) ret = 0; + else + ret = 1; + ret = (arg2->boolval == ret); + break; + case XPATH_STRING: + ret = xmlStrEqual(arg1->stringval, arg2->stringval); + break; + case XPATH_NUMBER: + valuePush(ctxt, arg1); + xmlXPathNumberFunction(ctxt, 1); + arg1 = valuePop(ctxt); + /* Hand check NaN and Infinity equalities */ + if (xmlXPathIsNaN(arg1->floatval) || xmlXPathIsNaN(arg2->floatval)) { + ret = 0; + } else if (xmlXPathIsInf(arg1->floatval) == 1) { + if (xmlXPathIsInf(arg2->floatval) == 1) + ret = 1; + else + ret = 0; + } else if (xmlXPathIsInf(arg1->floatval) == -1) { + if (xmlXPathIsInf(arg2->floatval) == -1) + ret = 1; + else + ret = 0; + } else if (xmlXPathIsInf(arg2->floatval) == 1) { + if (xmlXPathIsInf(arg1->floatval) == 1) + ret = 1; + else + ret = 0; + } else if (xmlXPathIsInf(arg2->floatval) == -1) { + if (xmlXPathIsInf(arg1->floatval) == -1) + ret = 1; + else + ret = 0; + } else { + ret = (arg1->floatval == arg2->floatval); + } + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + break; + } + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + break; + } + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(ret); +} + +/** + * xmlXPathEqualValues: + * @ctxt: the XPath Parser context + * + * Implement the equal operation on XPath objects content: @arg1 == @arg2 + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathEqualValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg1, arg2, argtmp; + int ret = 0; + + arg2 = valuePop(ctxt); + arg1 = valuePop(ctxt); + if ((arg1 == NULL) || (arg2 == NULL)) { + if (arg1 != NULL) + xmlXPathFreeObject(arg1); + else + xmlXPathFreeObject(arg2); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + + if (arg1 == arg2) { +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: by pointer\n"); +#endif + return(1); + } + + /* + *If either argument is a nodeset, it's a 'special case' + */ + if ((arg2->type == XPATH_NODESET) || (arg2->type == XPATH_XSLT_TREE) || + (arg1->type == XPATH_NODESET) || (arg1->type == XPATH_XSLT_TREE)) { + /* + *Hack it to assure arg1 is the nodeset + */ + if ((arg1->type != XPATH_NODESET) && (arg1->type != XPATH_XSLT_TREE)) { + argtmp = arg2; + arg2 = arg1; + arg1 = argtmp; + } + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + ret = xmlXPathEqualNodeSets(arg1, arg2, 0); + break; + case XPATH_BOOLEAN: + if ((arg1->nodesetval == NULL) || + (arg1->nodesetval->nodeNr == 0)) ret = 0; + else + ret = 1; + ret = (ret == arg2->boolval); + break; + case XPATH_NUMBER: + ret = xmlXPathEqualNodeSetFloat(ctxt, arg1, arg2->floatval, 0); + break; + case XPATH_STRING: + ret = xmlXPathEqualNodeSetString(arg1, arg2->stringval, 0); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + } + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(ret); + } + + return (xmlXPathEqualValuesCommon(ctxt, arg1, arg2)); +} + +/** + * xmlXPathNotEqualValues: + * @ctxt: the XPath Parser context + * + * Implement the equal operation on XPath objects content: @arg1 == @arg2 + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathNotEqualValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg1, arg2, argtmp; + int ret = 0; + + arg2 = valuePop(ctxt); + arg1 = valuePop(ctxt); + if ((arg1 == NULL) || (arg2 == NULL)) { + if (arg1 != NULL) + xmlXPathFreeObject(arg1); + else + xmlXPathFreeObject(arg2); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + + if (arg1 == arg2) { +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "NotEqual: by pointer\n"); +#endif + return(0); + } + + /* + *If either argument is a nodeset, it's a 'special case' + */ + if ((arg2->type == XPATH_NODESET) || (arg2->type == XPATH_XSLT_TREE) || + (arg1->type == XPATH_NODESET) || (arg1->type == XPATH_XSLT_TREE)) { + /* + *Hack it to assure arg1 is the nodeset + */ + if ((arg1->type != XPATH_NODESET) && (arg1->type != XPATH_XSLT_TREE)) { + argtmp = arg2; + arg2 = arg1; + arg1 = argtmp; + } + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "NotEqual: undefined\n"); +#endif + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + ret = xmlXPathEqualNodeSets(arg1, arg2, 1); + break; + case XPATH_BOOLEAN: + if ((arg1->nodesetval == NULL) || + (arg1->nodesetval->nodeNr == 0)) ret = 0; + else + ret = 1; + ret = (ret != arg2->boolval); + break; + case XPATH_NUMBER: + ret = xmlXPathEqualNodeSetFloat(ctxt, arg1, arg2->floatval, 1); + break; + case XPATH_STRING: + ret = xmlXPathEqualNodeSetString(arg1, arg2->stringval,1); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + } + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(ret); + } + + return (!xmlXPathEqualValuesCommon(ctxt, arg1, arg2)); +} + +/** + * xmlXPathCompareValues: + * @ctxt: the XPath Parser context + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * + * Implement the compare operation on XPath objects: + * @arg1 < @arg2 (1, 1, ... + * @arg1 <= @arg2 (1, 0, ... + * @arg1 > @arg2 (0, 1, ... + * @arg1 >= @arg2 (0, 0, ... + * + * When neither object to be compared is a node-set and the operator is + * <=, <, >=, >, then the objects are compared by converted both objects + * to numbers and comparing the numbers according to IEEE 754. The < + * comparison will be true if and only if the first number is less than the + * second number. The <= comparison will be true if and only if the first + * number is less than or equal to the second number. The > comparison + * will be true if and only if the first number is greater than the second + * number. The >= comparison will be true if and only if the first number + * is greater than or equal to the second number. + * + * Returns 1 if the comparison succeeded, 0 if it failed + */ +int +xmlXPathCompareValues(xmlXPathParserContextPtr ctxt, int inf, int strict) { + int ret = 0, arg1i = 0, arg2i = 0; + xmlXPathObjectPtr arg1, arg2; + + arg2 = valuePop(ctxt); + arg1 = valuePop(ctxt); + if ((arg1 == NULL) || (arg2 == NULL)) { + if (arg1 != NULL) + xmlXPathFreeObject(arg1); + else + xmlXPathFreeObject(arg2); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + + if ((arg2->type == XPATH_NODESET) || (arg2->type == XPATH_XSLT_TREE) || + (arg1->type == XPATH_NODESET) || (arg1->type == XPATH_XSLT_TREE)) { + if (((arg2->type == XPATH_NODESET) || (arg2->type == XPATH_XSLT_TREE)) && + ((arg1->type == XPATH_NODESET) || (arg1->type == XPATH_XSLT_TREE))){ + ret = xmlXPathCompareNodeSets(inf, strict, arg1, arg2); + } else { + if ((arg1->type == XPATH_NODESET) || (arg1->type == XPATH_XSLT_TREE)) { + ret = xmlXPathCompareNodeSetValue(ctxt, inf, strict, + arg1, arg2); + } else { + ret = xmlXPathCompareNodeSetValue(ctxt, !inf, strict, + arg2, arg1); + } + } + return(ret); + } + + if (arg1->type != XPATH_NUMBER) { + valuePush(ctxt, arg1); + xmlXPathNumberFunction(ctxt, 1); + arg1 = valuePop(ctxt); + } + if (arg1->type != XPATH_NUMBER) { + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + if (arg2->type != XPATH_NUMBER) { + valuePush(ctxt, arg2); + xmlXPathNumberFunction(ctxt, 1); + arg2 = valuePop(ctxt); + } + if (arg2->type != XPATH_NUMBER) { + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + /* + * Add tests for infinity and nan + * => feedback on 3.4 for Inf and NaN + */ + /* Hand check NaN and Infinity comparisons */ + if (xmlXPathIsNaN(arg1->floatval) || xmlXPathIsNaN(arg2->floatval)) { + ret=0; + } else { + arg1i=xmlXPathIsInf(arg1->floatval); + arg2i=xmlXPathIsInf(arg2->floatval); + if (inf && strict) { + if ((arg1i == -1 && arg2i != -1) || + (arg2i == 1 && arg1i != 1)) { + ret = 1; + } else if (arg1i == 0 && arg2i == 0) { + ret = (arg1->floatval < arg2->floatval); + } else { + ret = 0; + } + } + else if (inf && !strict) { + if (arg1i == -1 || arg2i == 1) { + ret = 1; + } else if (arg1i == 0 && arg2i == 0) { + ret = (arg1->floatval <= arg2->floatval); + } else { + ret = 0; + } + } + else if (!inf && strict) { + if ((arg1i == 1 && arg2i != 1) || + (arg2i == -1 && arg1i != -1)) { + ret = 1; + } else if (arg1i == 0 && arg2i == 0) { + ret = (arg1->floatval > arg2->floatval); + } else { + ret = 0; + } + } + else if (!inf && !strict) { + if (arg1i == 1 || arg2i == -1) { + ret = 1; + } else if (arg1i == 0 && arg2i == 0) { + ret = (arg1->floatval >= arg2->floatval); + } else { + ret = 0; + } + } + } + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(ret); +} + +/** + * xmlXPathValueFlipSign: + * @ctxt: the XPath Parser context + * + * Implement the unary - operation on an XPath object + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathValueFlipSign(xmlXPathParserContextPtr ctxt) { + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + if (xmlXPathIsNaN(ctxt->value->floatval)) + ctxt->value->floatval=xmlXPathNAN; + else if (xmlXPathIsInf(ctxt->value->floatval) == 1) + ctxt->value->floatval=xmlXPathNINF; + else if (xmlXPathIsInf(ctxt->value->floatval) == -1) + ctxt->value->floatval=xmlXPathPINF; + else if (ctxt->value->floatval == 0) { + if (xmlXPathGetSign(ctxt->value->floatval) == 0) + ctxt->value->floatval = xmlXPathNZERO; + else + ctxt->value->floatval = 0; + } + else + ctxt->value->floatval = - ctxt->value->floatval; +} + +/** + * xmlXPathAddValues: + * @ctxt: the XPath Parser context + * + * Implement the add operation on XPath objects: + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathAddValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double val; + + arg = valuePop(ctxt); + if (arg == NULL) + XP_ERROR(XPATH_INVALID_OPERAND); + val = xmlXPathCastToNumber(arg); + xmlXPathFreeObject(arg); + + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + ctxt->value->floatval += val; +} + +/** + * xmlXPathSubValues: + * @ctxt: the XPath Parser context + * + * Implement the subtraction operation on XPath objects: + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathSubValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double val; + + arg = valuePop(ctxt); + if (arg == NULL) + XP_ERROR(XPATH_INVALID_OPERAND); + val = xmlXPathCastToNumber(arg); + xmlXPathFreeObject(arg); + + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + ctxt->value->floatval -= val; +} + +/** + * xmlXPathMultValues: + * @ctxt: the XPath Parser context + * + * Implement the multiply operation on XPath objects: + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathMultValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double val; + + arg = valuePop(ctxt); + if (arg == NULL) + XP_ERROR(XPATH_INVALID_OPERAND); + val = xmlXPathCastToNumber(arg); + xmlXPathFreeObject(arg); + + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + ctxt->value->floatval *= val; +} + +/** + * xmlXPathDivValues: + * @ctxt: the XPath Parser context + * + * Implement the div operation on XPath objects @arg1 / @arg2: + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathDivValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double val; + + arg = valuePop(ctxt); + if (arg == NULL) + XP_ERROR(XPATH_INVALID_OPERAND); + val = xmlXPathCastToNumber(arg); + xmlXPathFreeObject(arg); + + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + if (xmlXPathIsNaN(val) || xmlXPathIsNaN(ctxt->value->floatval)) + ctxt->value->floatval = xmlXPathNAN; + else if (val == 0 && xmlXPathGetSign(val) != 0) { + if (ctxt->value->floatval == 0) + ctxt->value->floatval = xmlXPathNAN; + else if (ctxt->value->floatval > 0) + ctxt->value->floatval = xmlXPathNINF; + else if (ctxt->value->floatval < 0) + ctxt->value->floatval = xmlXPathPINF; + } + else if (val == 0) { + if (ctxt->value->floatval == 0) + ctxt->value->floatval = xmlXPathNAN; + else if (ctxt->value->floatval > 0) + ctxt->value->floatval = xmlXPathPINF; + else if (ctxt->value->floatval < 0) + ctxt->value->floatval = xmlXPathNINF; + } else + ctxt->value->floatval /= val; +} + +/** + * xmlXPathModValues: + * @ctxt: the XPath Parser context + * + * Implement the mod operation on XPath objects: @arg1 / @arg2 + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathModValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double arg1, arg2; + + arg = valuePop(ctxt); + if (arg == NULL) + XP_ERROR(XPATH_INVALID_OPERAND); + arg2 = xmlXPathCastToNumber(arg); + xmlXPathFreeObject(arg); + + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + arg1 = ctxt->value->floatval; + if (arg2 == 0) + ctxt->value->floatval = xmlXPathNAN; + else { + ctxt->value->floatval = fmod(arg1, arg2); + } +} + +/************************************************************************ + * * + * The traversal functions * + * * + ************************************************************************/ + +/* + * A traversal function enumerates nodes along an axis. + * Initially it must be called with NULL, and it indicates + * termination on the axis by returning NULL. + */ +typedef xmlNodePtr (*xmlXPathTraversalFunction) + (xmlXPathParserContextPtr ctxt, xmlNodePtr cur); + +/** + * xmlXPathNextSelf: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "self" direction + * The self axis contains just the context node itself + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextSelf(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) + return(ctxt->context->node); + return(NULL); +} + +/** + * xmlXPathNextChild: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "child" direction + * The child axis contains the children of the context node in document order. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) { + if (ctxt->context->node == NULL) return(NULL); + switch (ctxt->context->node->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + return(ctxt->context->node->children); + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return(((xmlDocPtr) ctxt->context->node)->children); + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_ATTRIBUTE_NODE: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return(NULL); + } + return(NULL); + } + if ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) + return(NULL); + return(cur->next); +} + +/** + * xmlXPathNextDescendant: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "descendant" direction + * the descendant axis contains the descendants of the context node in document + * order; a descendant is a child or a child of a child and so on. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextDescendant(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) { + if (ctxt->context->node == NULL) + return(NULL); + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); + + if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc) + return(ctxt->context->doc->children); + return(ctxt->context->node->children); + } + + if (cur->children != NULL) { + /* + * Do not descend on entities declarations + */ + if (cur->children->type != XML_ENTITY_DECL) { + cur = cur->children; + /* + * Skip DTDs + */ + if (cur->type != XML_DTD_NODE) + return(cur); + } + } + + if (cur == ctxt->context->node) return(NULL); + + while (cur->next != NULL) { + cur = cur->next; + if ((cur->type != XML_ENTITY_DECL) && + (cur->type != XML_DTD_NODE)) + return(cur); + } + + do { + cur = cur->parent; + if (cur == NULL) return(NULL); + if (cur == ctxt->context->node) return(NULL); + if (cur->next != NULL) { + cur = cur->next; + return(cur); + } + } while (cur != NULL); + return(cur); +} + +/** + * xmlXPathNextDescendantOrSelf: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "descendant-or-self" direction + * the descendant-or-self axis contains the context node and the descendants + * of the context node in document order; thus the context node is the first + * node on the axis, and the first child of the context node is the second node + * on the axis + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextDescendantOrSelf(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) { + if (ctxt->context->node == NULL) + return(NULL); + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); + return(ctxt->context->node); + } + + return(xmlXPathNextDescendant(ctxt, cur)); +} + +/** + * xmlXPathNextParent: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "parent" direction + * The parent axis contains the parent of the context node, if there is one. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextParent(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + /* + * the parent of an attribute or namespace node is the element + * to which the attribute or namespace node is attached + * Namespace handling !!! + */ + if (cur == NULL) { + if (ctxt->context->node == NULL) return(NULL); + switch (ctxt->context->node->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + case XML_ENTITY_DECL: + if (ctxt->context->node->parent == NULL) + return((xmlNodePtr) ctxt->context->doc); + return(ctxt->context->node->parent); + case XML_ATTRIBUTE_NODE: { + xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node; + + return(att->parent); + } + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return(NULL); + case XML_NAMESPACE_DECL: { + xmlNsPtr ns = (xmlNsPtr) ctxt->context->node; + + if ((ns->next != NULL) && + (ns->next->type != XML_NAMESPACE_DECL)) + return((xmlNodePtr) ns->next); + return(NULL); + } + } + } + return(NULL); +} + +/** + * xmlXPathNextAncestor: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "ancestor" direction + * the ancestor axis contains the ancestors of the context node; the ancestors + * of the context node consist of the parent of context node and the parent's + * parent and so on; the nodes are ordered in reverse document order; thus the + * parent is the first node on the axis, and the parent's parent is the second + * node on the axis + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + /* + * the parent of an attribute or namespace node is the element + * to which the attribute or namespace node is attached + * !!!!!!!!!!!!! + */ + if (cur == NULL) { + if (ctxt->context->node == NULL) return(NULL); + switch (ctxt->context->node->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_NOTATION_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + if (ctxt->context->node->parent == NULL) + return((xmlNodePtr) ctxt->context->doc); + return(ctxt->context->node->parent); + case XML_ATTRIBUTE_NODE: { + xmlAttrPtr tmp = (xmlAttrPtr) ctxt->context->node; + + return(tmp->parent); + } + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return(NULL); + case XML_NAMESPACE_DECL: { + xmlNsPtr ns = (xmlNsPtr) ctxt->context->node; + + if ((ns->next != NULL) && + (ns->next->type != XML_NAMESPACE_DECL)) + return((xmlNodePtr) ns->next); + /* Bad, how did that namespace ended-up there ? */ + return(NULL); + } + } + return(NULL); + } + if (cur == ctxt->context->doc->children) + return((xmlNodePtr) ctxt->context->doc); + if (cur == (xmlNodePtr) ctxt->context->doc) + return(NULL); + switch (cur->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return(cur->parent); + case XML_ATTRIBUTE_NODE: { + xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node; + + return(att->parent); + } + case XML_NAMESPACE_DECL: { + xmlNsPtr ns = (xmlNsPtr) ctxt->context->node; + + if ((ns->next != NULL) && + (ns->next->type != XML_NAMESPACE_DECL)) + return((xmlNodePtr) ns->next); + /* Bad, how did that namespace ended-up there ? */ + return(NULL); + } + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + return(NULL); + } + return(NULL); +} + +/** + * xmlXPathNextAncestorOrSelf: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "ancestor-or-self" direction + * he ancestor-or-self axis contains the context node and ancestors of + * the context node in reverse document order; thus the context node is + * the first node on the axis, and the context node's parent the second; + * parent here is defined the same as with the parent axis. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextAncestorOrSelf(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) + return(ctxt->context->node); + return(xmlXPathNextAncestor(ctxt, cur)); +} + +/** + * xmlXPathNextFollowingSibling: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "following-sibling" direction + * The following-sibling axis contains the following siblings of the context + * node in document order. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextFollowingSibling(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); + if (cur == (xmlNodePtr) ctxt->context->doc) + return(NULL); + if (cur == NULL) + return(ctxt->context->node->next); + return(cur->next); +} + +/** + * xmlXPathNextPrecedingSibling: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "preceding-sibling" direction + * The preceding-sibling axis contains the preceding siblings of the context + * node in reverse document order; the first preceding sibling is first on the + * axis; the sibling preceding that node is the second on the axis and so on. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextPrecedingSibling(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); + if (cur == (xmlNodePtr) ctxt->context->doc) + return(NULL); + if (cur == NULL) + return(ctxt->context->node->prev); + if ((cur->prev != NULL) && (cur->prev->type == XML_DTD_NODE)) { + cur = cur->prev; + if (cur == NULL) + return(ctxt->context->node->prev); + } + return(cur->prev); +} + +/** + * xmlXPathNextFollowing: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "following" direction + * The following axis contains all nodes in the same document as the context + * node that are after the context node in document order, excluding any + * descendants and excluding attribute nodes and namespace nodes; the nodes + * are ordered in document order + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextFollowing(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur != NULL && cur->children != NULL) + return cur->children ; + if (cur == NULL) cur = ctxt->context->node; + if (cur == NULL) return(NULL) ; /* ERROR */ + if (cur->next != NULL) return(cur->next) ; + do { + cur = cur->parent; + if (cur == NULL) return(NULL); + if (cur == (xmlNodePtr) ctxt->context->doc) return(NULL); + if (cur->next != NULL) return(cur->next); + } while (cur != NULL); + return(cur); +} + +/* + * xmlXPathIsAncestor: + * @ancestor: the ancestor node + * @node: the current node + * + * Check that @ancestor is a @node's ancestor + * + * returns 1 if @ancestor is a @node's ancestor, 0 otherwise. + */ +static int +xmlXPathIsAncestor(xmlNodePtr ancestor, xmlNodePtr node) { + if ((ancestor == NULL) || (node == NULL)) return(0); + /* nodes need to be in the same document */ + if (ancestor->doc != node->doc) return(0); + /* avoid searching if ancestor or node is the root node */ + if (ancestor == (xmlNodePtr) node->doc) return(1); + if (node == (xmlNodePtr) ancestor->doc) return(0); + while (node->parent != NULL) { + if (node->parent == ancestor) + return(1); + node = node->parent; + } + return(0); +} + +/** + * xmlXPathNextPreceding: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "preceding" direction + * the preceding axis contains all nodes in the same document as the context + * node that are before the context node in document order, excluding any + * ancestors and excluding attribute nodes and namespace nodes; the nodes are + * ordered in reverse document order + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) +{ + if (cur == NULL) + cur = ctxt->context->node; + if (cur == NULL) + return (NULL); + if ((cur->prev != NULL) && (cur->prev->type == XML_DTD_NODE)) + cur = cur->prev; + do { + if (cur->prev != NULL) { + for (cur = cur->prev; cur->last != NULL; cur = cur->last) ; + return (cur); + } + + cur = cur->parent; + if (cur == NULL) + return (NULL); + if (cur == ctxt->context->doc->children) + return (NULL); + } while (xmlXPathIsAncestor(cur, ctxt->context->node)); + return (cur); +} + +/** + * xmlXPathNextPrecedingInternal: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "preceding" direction + * the preceding axis contains all nodes in the same document as the context + * node that are before the context node in document order, excluding any + * ancestors and excluding attribute nodes and namespace nodes; the nodes are + * ordered in reverse document order + * This is a faster implementation but internal only since it requires a + * state kept in the parser context: ctxt->ancestor. + * + * Returns the next element following that axis + */ +static xmlNodePtr +xmlXPathNextPrecedingInternal(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur) +{ + if (cur == NULL) { + cur = ctxt->context->node; + if (cur == NULL) + return (NULL); + ctxt->ancestor = cur->parent; + } + if ((cur->prev != NULL) && (cur->prev->type == XML_DTD_NODE)) + cur = cur->prev; + while (cur->prev == NULL) { + cur = cur->parent; + if (cur == NULL) + return (NULL); + if (cur == ctxt->context->doc->children) + return (NULL); + if (cur != ctxt->ancestor) + return (cur); + ctxt->ancestor = cur->parent; + } + cur = cur->prev; + while (cur->last != NULL) + cur = cur->last; + return (cur); +} + +/** + * xmlXPathNextNamespace: + * @ctxt: the XPath Parser context + * @cur: the current attribute in the traversal + * + * Traversal function for the "namespace" direction + * the namespace axis contains the namespace nodes of the context node; + * the order of nodes on this axis is implementation-defined; the axis will + * be empty unless the context node is an element + * + * We keep the XML namespace node at the end of the list. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextNamespace(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (ctxt->context->node->type != XML_ELEMENT_NODE) return(NULL); + if (ctxt->context->tmpNsList == NULL && cur != (xmlNodePtr) xmlXPathXMLNamespace) { + if (ctxt->context->tmpNsList != NULL) + xmlFree(ctxt->context->tmpNsList); + ctxt->context->tmpNsList = + xmlGetNsList(ctxt->context->doc, ctxt->context->node); + ctxt->context->tmpNsNr = 0; + if (ctxt->context->tmpNsList != NULL) { + while (ctxt->context->tmpNsList[ctxt->context->tmpNsNr] != NULL) { + ctxt->context->tmpNsNr++; + } + } + return((xmlNodePtr) xmlXPathXMLNamespace); + } + if (ctxt->context->tmpNsNr > 0) { + return (xmlNodePtr)ctxt->context->tmpNsList[--ctxt->context->tmpNsNr]; + } else { + if (ctxt->context->tmpNsList != NULL) + xmlFree(ctxt->context->tmpNsList); + ctxt->context->tmpNsList = NULL; + return(NULL); + } +} + +/** + * xmlXPathNextAttribute: + * @ctxt: the XPath Parser context + * @cur: the current attribute in the traversal + * + * Traversal function for the "attribute" direction + * TODO: support DTD inherited default attributes + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextAttribute(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (ctxt->context->node == NULL) + return(NULL); + if (ctxt->context->node->type != XML_ELEMENT_NODE) + return(NULL); + if (cur == NULL) { + if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc) + return(NULL); + return((xmlNodePtr)ctxt->context->node->properties); + } + return((xmlNodePtr)cur->next); +} + +/************************************************************************ + * * + * NodeTest Functions * + * * + ************************************************************************/ + +#define IS_FUNCTION 200 + + +/************************************************************************ + * * + * Implicit tree core function library * + * * + ************************************************************************/ + +/** + * xmlXPathRoot: + * @ctxt: the XPath Parser context + * + * Initialize the context to the root of the document + */ +void +xmlXPathRoot(xmlXPathParserContextPtr ctxt) { + ctxt->context->node = (xmlNodePtr) ctxt->context->doc; + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); +} + +/************************************************************************ + * * + * The explicit core function library * + *http://www.w3.org/Style/XSL/Group/1999/07/xpath-19990705.html#corelib * + * * + ************************************************************************/ + + +/** + * xmlXPathLastFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the last() XPath function + * number last() + * The last function returns the number of nodes in the context node list. + */ +void +xmlXPathLastFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + if (ctxt->context->contextSize >= 0) { + valuePush(ctxt, xmlXPathNewFloat((double) ctxt->context->contextSize)); +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "last() : %d\n", ctxt->context->contextSize); +#endif + } else { + XP_ERROR(XPATH_INVALID_CTXT_SIZE); + } +} + +/** + * xmlXPathPositionFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the position() XPath function + * number position() + * The position function returns the position of the context node in the + * context node list. The first position is 1, and so the last position + * will be equal to last(). + */ +void +xmlXPathPositionFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + if (ctxt->context->proximityPosition >= 0) { + valuePush(ctxt, + xmlXPathNewFloat((double) ctxt->context->proximityPosition)); +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, "position() : %d\n", + ctxt->context->proximityPosition); +#endif + } else { + XP_ERROR(XPATH_INVALID_CTXT_POSITION); + } +} + +/** + * xmlXPathCountFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the count() XPath function + * number count(node-set) + */ +void +xmlXPathCountFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if ((cur == NULL) || (cur->nodesetval == NULL)) + valuePush(ctxt, xmlXPathNewFloat((double) 0)); + else if ((cur->type == XPATH_NODESET) || (cur->type == XPATH_XSLT_TREE)) { + valuePush(ctxt, xmlXPathNewFloat((double) cur->nodesetval->nodeNr)); + } else { + if ((cur->nodesetval->nodeNr != 1) || + (cur->nodesetval->nodeTab == NULL)) { + valuePush(ctxt, xmlXPathNewFloat((double) 0)); + } else { + xmlNodePtr tmp; + int i = 0; + + tmp = cur->nodesetval->nodeTab[0]; + if (tmp != NULL) { + tmp = tmp->children; + while (tmp != NULL) { + tmp = tmp->next; + i++; + } + } + valuePush(ctxt, xmlXPathNewFloat((double) i)); + } + } + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathGetElementsByIds: + * @doc: the document + * @ids: a whitespace separated list of IDs + * + * Selects elements by their unique ID. + * + * Returns a node-set of selected elements. + */ +static xmlNodeSetPtr +xmlXPathGetElementsByIds (xmlDocPtr doc, const xmlChar *ids) { + xmlNodeSetPtr ret; + const xmlChar *cur = ids; + xmlChar *ID; + xmlAttrPtr attr; + xmlNodePtr elem = NULL; + + ret = xmlXPathNodeSetCreate(NULL); + + while (IS_BLANK(*cur)) cur++; + while (*cur != 0) { + while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || + (*cur == '.') || (*cur == '-') || + (*cur == '_') || (*cur == ':') || + (IS_COMBINING(*cur)) || + (IS_EXTENDER(*cur))) + cur++; + + if ((!IS_BLANK(*cur)) && (*cur != 0)) break; + + ID = xmlStrndup(ids, cur - ids); + attr = xmlGetID(doc, ID); + if (attr != NULL) { + elem = attr->parent; + xmlXPathNodeSetAdd(ret, elem); + } + if (ID != NULL) + xmlFree(ID); + + while (IS_BLANK(*cur)) cur++; + ids = cur; + } + return(ret); +} + +/** + * xmlXPathIdFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the id() XPath function + * node-set id(object) + * The id function selects elements by their unique ID + * (see [5.2.1 Unique IDs]). When the argument to id is of type node-set, + * then the result is the union of the result of applying id to the + * string value of each of the nodes in the argument node-set. When the + * argument to id is of any other type, the argument is converted to a + * string as if by a call to the string function; the string is split + * into a whitespace-separated list of tokens (whitespace is any sequence + * of characters matching the production S); the result is a node-set + * containing the elements in the same document as the context node that + * have a unique ID equal to any of the tokens in the list. + */ +void +xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlChar *tokens; + xmlNodeSetPtr ret; + xmlXPathObjectPtr obj; + + CHECK_ARITY(1); + obj = valuePop(ctxt); + if (obj == NULL) XP_ERROR(XPATH_INVALID_OPERAND); + if ((obj->type == XPATH_NODESET) || (obj->type == XPATH_XSLT_TREE)) { + xmlNodeSetPtr ns; + int i; + + ret = xmlXPathNodeSetCreate(NULL); + + if (obj->nodesetval != NULL) { + for (i = 0; i < obj->nodesetval->nodeNr; i++) { + tokens = + xmlXPathCastNodeToString(obj->nodesetval->nodeTab[i]); + ns = xmlXPathGetElementsByIds(ctxt->context->doc, tokens); + ret = xmlXPathNodeSetMerge(ret, ns); + xmlXPathFreeNodeSet(ns); + if (tokens != NULL) + xmlFree(tokens); + } + } + + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPathWrapNodeSet(ret)); + return; + } + obj = xmlXPathConvertString(obj); + + ret = xmlXPathGetElementsByIds(ctxt->context->doc, obj->stringval); + valuePush(ctxt, xmlXPathWrapNodeSet(ret)); + + xmlXPathFreeObject(obj); + return; +} + +/** + * xmlXPathLocalNameFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the local-name() XPath function + * string local-name(node-set?) + * The local-name function returns a string containing the local part + * of the name of the node in the argument node-set that is first in + * document order. If the node-set is empty or the first node has no + * name, an empty string is returned. If the argument is omitted it + * defaults to the context node. + */ +void +xmlXPathLocalNameFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + nargs = 1; + } + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if ((cur->nodesetval == NULL) || (cur->nodesetval->nodeNr == 0)) { + valuePush(ctxt, xmlXPathNewCString("")); + } else { + int i = 0; /* Should be first in document order !!!!! */ + switch (cur->nodesetval->nodeTab[i]->type) { + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + case XML_PI_NODE: + valuePush(ctxt, + xmlXPathNewString(cur->nodesetval->nodeTab[i]->name)); + break; + case XML_NAMESPACE_DECL: + valuePush(ctxt, xmlXPathNewString( + ((xmlNsPtr)cur->nodesetval->nodeTab[i])->prefix)); + break; + default: + valuePush(ctxt, xmlXPathNewCString("")); + } + } + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathNamespaceURIFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the namespace-uri() XPath function + * string namespace-uri(node-set?) + * The namespace-uri function returns a string containing the + * namespace URI of the expanded name of the node in the argument + * node-set that is first in document order. If the node-set is empty, + * the first node has no name, or the expanded name has no namespace + * URI, an empty string is returned. If the argument is omitted it + * defaults to the context node. + */ +void +xmlXPathNamespaceURIFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + nargs = 1; + } + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if ((cur->nodesetval == NULL) || (cur->nodesetval->nodeNr == 0)) { + valuePush(ctxt, xmlXPathNewCString("")); + } else { + int i = 0; /* Should be first in document order !!!!! */ + switch (cur->nodesetval->nodeTab[i]->type) { + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + if (cur->nodesetval->nodeTab[i]->ns == NULL) + valuePush(ctxt, xmlXPathNewCString("")); + else + valuePush(ctxt, xmlXPathNewString( + cur->nodesetval->nodeTab[i]->ns->href)); + break; + default: + valuePush(ctxt, xmlXPathNewCString("")); + } + } + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathNameFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the name() XPath function + * string name(node-set?) + * The name function returns a string containing a QName representing + * the name of the node in the argument node-set that is first in document + * order. The QName must represent the name with respect to the namespace + * declarations in effect on the node whose name is being represented. + * Typically, this will be the form in which the name occurred in the XML + * source. This need not be the case if there are namespace declarations + * in effect on the node that associate multiple prefixes with the same + * namespace. However, an implementation may include information about + * the original prefix in its representation of nodes; in this case, an + * implementation can ensure that the returned string is always the same + * as the QName used in the XML source. If the argument it omitted it + * defaults to the context node. + * Libxml keep the original prefix so the "real qualified name" used is + * returned. + */ +static void +xmlXPathNameFunction(xmlXPathParserContextPtr ctxt, int nargs) +{ + xmlXPathObjectPtr cur; + + if (nargs == 0) { + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + nargs = 1; + } + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if ((cur->nodesetval == NULL) || (cur->nodesetval->nodeNr == 0)) { + valuePush(ctxt, xmlXPathNewCString("")); + } else { + int i = 0; /* Should be first in document order !!!!! */ + + switch (cur->nodesetval->nodeTab[i]->type) { + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + if ((cur->nodesetval->nodeTab[i]->ns == NULL) || + (cur->nodesetval->nodeTab[i]->ns->prefix == NULL)) + valuePush(ctxt, + xmlXPathNewString(cur->nodesetval-> + nodeTab[i]->name)); + + else { + char name[2000]; + + snprintf(name, sizeof(name), "%s:%s", + (char *) cur->nodesetval->nodeTab[i]->ns-> + prefix, + (char *) cur->nodesetval->nodeTab[i]->name); + name[sizeof(name) - 1] = 0; + valuePush(ctxt, xmlXPathNewCString(name)); + } + break; + default: + valuePush(ctxt, + xmlXPathNewNodeSet(cur->nodesetval->nodeTab[i])); + xmlXPathLocalNameFunction(ctxt, 1); + } + } + xmlXPathFreeObject(cur); +} + + +/** + * xmlXPathStringFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the string() XPath function + * string string(object?) + * he string function converts an object to a string as follows: + * - A node-set is converted to a string by returning the value of + * the node in the node-set that is first in document order. + * If the node-set is empty, an empty string is returned. + * - A number is converted to a string as follows + * + NaN is converted to the string NaN + * + positive zero is converted to the string 0 + * + negative zero is converted to the string 0 + * + positive infinity is converted to the string Infinity + * + negative infinity is converted to the string -Infinity + * + if the number is an integer, the number is represented in + * decimal form as a Number with no decimal point and no leading + * zeros, preceded by a minus sign (-) if the number is negative + * + otherwise, the number is represented in decimal form as a + * Number including a decimal point with at least one digit + * before the decimal point and at least one digit after the + * decimal point, preceded by a minus sign (-) if the number + * is negative; there must be no leading zeros before the decimal + * point apart possibly from the one required digit immediately + * before the decimal point; beyond the one required digit + * after the decimal point there must be as many, but only as + * many, more digits as are needed to uniquely distinguish the + * number from all other IEEE 754 numeric values. + * - The boolean false value is converted to the string false. + * The boolean true value is converted to the string true. + * + * If the argument is omitted, it defaults to a node-set with the + * context node as its only member. + */ +void +xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + valuePush(ctxt, + xmlXPathWrapString( + xmlXPathCastNodeToString(ctxt->context->node))); + return; + } + + CHECK_ARITY(1); + cur = valuePop(ctxt); + if (cur == NULL) XP_ERROR(XPATH_INVALID_OPERAND); + cur = xmlXPathConvertString(cur); + valuePush(ctxt, cur); +} + +/** + * xmlXPathStringLengthFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the string-length() XPath function + * number string-length(string?) + * The string-length returns the number of characters in the string + * (see [3.6 Strings]). If the argument is omitted, it defaults to + * the context node converted to a string, in other words the value + * of the context node. + */ +void +xmlXPathStringLengthFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + if (ctxt->context->node == NULL) { + valuePush(ctxt, xmlXPathNewFloat(0)); + } else { + xmlChar *content; + + content = xmlXPathCastNodeToString(ctxt->context->node); + valuePush(ctxt, xmlXPathNewFloat(xmlUTF8Strlen(content))); + xmlFree(content); + } + return; + } + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + cur = valuePop(ctxt); + valuePush(ctxt, xmlXPathNewFloat(xmlUTF8Strlen(cur->stringval))); + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathConcatFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the concat() XPath function + * string concat(string, string, string*) + * The concat function returns the concatenation of its arguments. + */ +void +xmlXPathConcatFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur, newobj; + xmlChar *tmp; + + if (nargs < 2) { + CHECK_ARITY(2); + } + + CAST_TO_STRING; + cur = valuePop(ctxt); + if ((cur == NULL) || (cur->type != XPATH_STRING)) { + xmlXPathFreeObject(cur); + return; + } + nargs--; + + while (nargs > 0) { + CAST_TO_STRING; + newobj = valuePop(ctxt); + if ((newobj == NULL) || (newobj->type != XPATH_STRING)) { + xmlXPathFreeObject(newobj); + xmlXPathFreeObject(cur); + XP_ERROR(XPATH_INVALID_TYPE); + } + tmp = xmlStrcat(newobj->stringval, cur->stringval); + newobj->stringval = cur->stringval; + cur->stringval = tmp; + + xmlXPathFreeObject(newobj); + nargs--; + } + valuePush(ctxt, cur); +} + +/** + * xmlXPathContainsFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the contains() XPath function + * boolean contains(string, string) + * The contains function returns true if the first argument string + * contains the second argument string, and otherwise returns false. + */ +void +xmlXPathContainsFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr hay, needle; + + CHECK_ARITY(2); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + needle = valuePop(ctxt); + CAST_TO_STRING; + hay = valuePop(ctxt); + if ((hay == NULL) || (hay->type != XPATH_STRING)) { + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); + XP_ERROR(XPATH_INVALID_TYPE); + } + if (xmlStrstr(hay->stringval, needle->stringval)) + valuePush(ctxt, xmlXPathNewBoolean(1)); + else + valuePush(ctxt, xmlXPathNewBoolean(0)); + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); +} + +/** + * xmlXPathStartsWithFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the starts-with() XPath function + * boolean starts-with(string, string) + * The starts-with function returns true if the first argument string + * starts with the second argument string, and otherwise returns false. + */ +void +xmlXPathStartsWithFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr hay, needle; + int n; + + CHECK_ARITY(2); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + needle = valuePop(ctxt); + CAST_TO_STRING; + hay = valuePop(ctxt); + if ((hay == NULL) || (hay->type != XPATH_STRING)) { + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); + XP_ERROR(XPATH_INVALID_TYPE); + } + n = xmlStrlen(needle->stringval); + if (xmlStrncmp(hay->stringval, needle->stringval, n)) + valuePush(ctxt, xmlXPathNewBoolean(0)); + else + valuePush(ctxt, xmlXPathNewBoolean(1)); + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); +} + +/** + * xmlXPathSubstringFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the substring() XPath function + * string substring(string, number, number?) + * The substring function returns the substring of the first argument + * starting at the position specified in the second argument with + * length specified in the third argument. For example, + * substring("12345",2,3) returns "234". If the third argument is not + * specified, it returns the substring starting at the position specified + * in the second argument and continuing to the end of the string. For + * example, substring("12345",2) returns "2345". More precisely, each + * character in the string (see [3.6 Strings]) is considered to have a + * numeric position: the position of the first character is 1, the position + * of the second character is 2 and so on. The returned substring contains + * those characters for which the position of the character is greater than + * or equal to the second argument and, if the third argument is specified, + * less than the sum of the second and third arguments; the comparisons + * and addition used for the above follow the standard IEEE 754 rules. Thus: + * - substring("12345", 1.5, 2.6) returns "234" + * - substring("12345", 0, 3) returns "12" + * - substring("12345", 0 div 0, 3) returns "" + * - substring("12345", 1, 0 div 0) returns "" + * - substring("12345", -42, 1 div 0) returns "12345" + * - substring("12345", -1 div 0, 1 div 0) returns "" + */ +void +xmlXPathSubstringFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str, start, len; + double le=0, in; + int i, l, m; + xmlChar *ret; + + if (nargs < 2) { + CHECK_ARITY(2); + } + if (nargs > 3) { + CHECK_ARITY(3); + } + /* + * take care of possible last (position) argument + */ + if (nargs == 3) { + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + len = valuePop(ctxt); + le = len->floatval; + xmlXPathFreeObject(len); + } + + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + start = valuePop(ctxt); + in = start->floatval; + xmlXPathFreeObject(start); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + str = valuePop(ctxt); + m = xmlUTF8Strlen((const unsigned char *)str->stringval); + + /* + * If last pos not present, calculate last position + */ + if (nargs != 3) { + le = (double)m; + if (in < 1.0) + in = 1.0; + } + + /* Need to check for the special cases where either + * the index is NaN, the length is NaN, or both + * arguments are infinity (relying on Inf + -Inf = NaN) + */ + if (!xmlXPathIsNaN(in + le) && !xmlXPathIsInf(in)) { + /* + * To meet the requirements of the spec, the arguments + * must be converted to integer format before + * initial index calculations are done + * + * First we go to integer form, rounding up + * and checking for special cases + */ + i = (int) in; + if (((double)i)+0.5 <= in) i++; + + if (xmlXPathIsInf(le) == 1) { + l = m; + if (i < 1) + i = 1; + } + else if (xmlXPathIsInf(le) == -1 || le < 0.0) + l = 0; + else { + l = (int) le; + if (((double)l)+0.5 <= le) l++; + } + + /* Now we normalize inidices */ + i -= 1; + l += i; + if (i < 0) + i = 0; + if (l > m) + l = m; + + /* number of chars to copy */ + l -= i; + + ret = xmlUTF8Strsub(str->stringval, i, l); + } + else { + ret = NULL; + } + + if (ret == NULL) + valuePush(ctxt, xmlXPathNewCString("")); + else { + valuePush(ctxt, xmlXPathNewString(ret)); + xmlFree(ret); + } + + xmlXPathFreeObject(str); +} + +/** + * xmlXPathSubstringBeforeFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the substring-before() XPath function + * string substring-before(string, string) + * The substring-before function returns the substring of the first + * argument string that precedes the first occurrence of the second + * argument string in the first argument string, or the empty string + * if the first argument string does not contain the second argument + * string. For example, substring-before("1999/04/01","/") returns 1999. + */ +void +xmlXPathSubstringBeforeFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str; + xmlXPathObjectPtr find; + xmlBufferPtr target; + const xmlChar *point; + int offset; + + CHECK_ARITY(2); + CAST_TO_STRING; + find = valuePop(ctxt); + CAST_TO_STRING; + str = valuePop(ctxt); + + target = xmlBufferCreate(); + if (target) { + point = xmlStrstr(str->stringval, find->stringval); + if (point) { + offset = (int)(point - str->stringval); + xmlBufferAdd(target, str->stringval, offset); + } + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + } + + xmlXPathFreeObject(str); + xmlXPathFreeObject(find); +} + +/** + * xmlXPathSubstringAfterFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the substring-after() XPath function + * string substring-after(string, string) + * The substring-after function returns the substring of the first + * argument string that follows the first occurrence of the second + * argument string in the first argument string, or the empty stringi + * if the first argument string does not contain the second argument + * string. For example, substring-after("1999/04/01","/") returns 04/01, + * and substring-after("1999/04/01","19") returns 99/04/01. + */ +void +xmlXPathSubstringAfterFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str; + xmlXPathObjectPtr find; + xmlBufferPtr target; + const xmlChar *point; + int offset; + + CHECK_ARITY(2); + CAST_TO_STRING; + find = valuePop(ctxt); + CAST_TO_STRING; + str = valuePop(ctxt); + + target = xmlBufferCreate(); + if (target) { + point = xmlStrstr(str->stringval, find->stringval); + if (point) { + offset = (int)(point - str->stringval) + xmlStrlen(find->stringval); + xmlBufferAdd(target, &str->stringval[offset], + xmlStrlen(str->stringval) - offset); + } + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + } + + xmlXPathFreeObject(str); + xmlXPathFreeObject(find); +} + +/** + * xmlXPathNormalizeFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the normalize-space() XPath function + * string normalize-space(string?) + * The normalize-space function returns the argument string with white + * space normalized by stripping leading and trailing whitespace + * and replacing sequences of whitespace characters by a single + * space. Whitespace characters are the same allowed by the S production + * in XML. If the argument is omitted, it defaults to the context + * node converted to a string, in other words the value of the context node. + */ +void +xmlXPathNormalizeFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr obj = NULL; + xmlChar *source = NULL; + xmlBufferPtr target; + xmlChar blank; + + if (nargs == 0) { + /* Use current context node */ + valuePush(ctxt, + xmlXPathWrapString( + xmlXPathCastNodeToString(ctxt->context->node))); + nargs = 1; + } + + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + obj = valuePop(ctxt); + source = obj->stringval; + + target = xmlBufferCreate(); + if (target && source) { + + /* Skip leading whitespaces */ + while (IS_BLANK(*source)) + source++; + + /* Collapse intermediate whitespaces, and skip trailing whitespaces */ + blank = 0; + while (*source) { + if (IS_BLANK(*source)) { + blank = 0x20; + } else { + if (blank) { + xmlBufferAdd(target, &blank, 1); + blank = 0; + } + xmlBufferAdd(target, source, 1); + } + source++; + } + + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + } + xmlXPathFreeObject(obj); +} + +/** + * xmlXPathTranslateFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the translate() XPath function + * string translate(string, string, string) + * The translate function returns the first argument string with + * occurrences of characters in the second argument string replaced + * by the character at the corresponding position in the third argument + * string. For example, translate("bar","abc","ABC") returns the string + * BAr. If there is a character in the second argument string with no + * character at a corresponding position in the third argument string + * (because the second argument string is longer than the third argument + * string), then occurrences of that character in the first argument + * string are removed. For example, translate("--aaa--","abc-","ABC") + * returns "AAA". If a character occurs more than once in second + * argument string, then the first occurrence determines the replacement + * character. If the third argument string is longer than the second + * argument string, then excess characters are ignored. + */ +void +xmlXPathTranslateFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str; + xmlXPathObjectPtr from; + xmlXPathObjectPtr to; + xmlBufferPtr target; + int offset, max; + xmlChar ch; + xmlChar *point; + xmlChar *cptr; + + CHECK_ARITY(3); + + CAST_TO_STRING; + to = valuePop(ctxt); + CAST_TO_STRING; + from = valuePop(ctxt); + CAST_TO_STRING; + str = valuePop(ctxt); + + target = xmlBufferCreate(); + if (target) { + max = xmlUTF8Strlen(to->stringval); + for (cptr = str->stringval; (ch=*cptr); ) { + offset = xmlUTF8Strloc(from->stringval, cptr); + if (offset >= 0) { + if (offset < max) { + point = xmlUTF8Strpos(to->stringval, offset); + if (point) + xmlBufferAdd(target, point, xmlUTF8Strsize(point, 1)); + } + } else + xmlBufferAdd(target, cptr, xmlUTF8Strsize(cptr, 1)); + + /* Step to next character in input */ + cptr++; + if ( ch & 0x80 ) { + /* if not simple ascii, verify proper format */ + if ( (ch & 0xc0) != 0xc0 ) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathTranslateFunction: Invalid UTF8 string\n"); + break; + } + /* then skip over remaining bytes for this char */ + while ( (ch <<= 1) & 0x80 ) + if ( (*cptr++ & 0xc0) != 0x80 ) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathTranslateFunction: Invalid UTF8 string\n"); + break; + } + if (ch & 0x80) /* must have had error encountered */ + break; + } + } + } + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + xmlXPathFreeObject(str); + xmlXPathFreeObject(from); + xmlXPathFreeObject(to); +} + +/** + * xmlXPathBooleanFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the boolean() XPath function + * boolean boolean(object) + * he boolean function converts its argument to a boolean as follows: + * - a number is true if and only if it is neither positive or + * negative zero nor NaN + * - a node-set is true if and only if it is non-empty + * - a string is true if and only if its length is non-zero + */ +void +xmlXPathBooleanFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + CHECK_ARITY(1); + cur = valuePop(ctxt); + if (cur == NULL) XP_ERROR(XPATH_INVALID_OPERAND); + cur = xmlXPathConvertBoolean(cur); + valuePush(ctxt, cur); +} + +/** + * xmlXPathNotFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the not() XPath function + * boolean not(boolean) + * The not function returns true if its argument is false, + * and false otherwise. + */ +void +xmlXPathNotFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(1); + CAST_TO_BOOLEAN; + CHECK_TYPE(XPATH_BOOLEAN); + ctxt->value->boolval = ! ctxt->value->boolval; +} + +/** + * xmlXPathTrueFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the true() XPath function + * boolean true() + */ +void +xmlXPathTrueFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + valuePush(ctxt, xmlXPathNewBoolean(1)); +} + +/** + * xmlXPathFalseFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the false() XPath function + * boolean false() + */ +void +xmlXPathFalseFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + valuePush(ctxt, xmlXPathNewBoolean(0)); +} + +/** + * xmlXPathLangFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the lang() XPath function + * boolean lang(string) + * The lang function returns true or false depending on whether the + * language of the context node as specified by xml:lang attributes + * is the same as or is a sublanguage of the language specified by + * the argument string. The language of the context node is determined + * by the value of the xml:lang attribute on the context node, or, if + * the context node has no xml:lang attribute, by the value of the + * xml:lang attribute on the nearest ancestor of the context node that + * has an xml:lang attribute. If there is no such attribute, then lang + * returns false. If there is such an attribute, then lang returns + * true if the attribute value is equal to the argument ignoring case, + * or if there is some suffix starting with - such that the attribute + * value is equal to the argument ignoring that suffix of the attribute + * value and ignoring case. + */ +void +xmlXPathLangFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr val; + const xmlChar *theLang; + const xmlChar *lang; + int ret = 0; + int i; + + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + val = valuePop(ctxt); + lang = val->stringval; + theLang = xmlNodeGetLang(ctxt->context->node); + if ((theLang != NULL) && (lang != NULL)) { + for (i = 0;lang[i] != 0;i++) + if (toupper(lang[i]) != toupper(theLang[i])) + goto not_equal; + ret = 1; + } +not_equal: + xmlXPathFreeObject(val); + valuePush(ctxt, xmlXPathNewBoolean(ret)); +} + +/** + * xmlXPathNumberFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the number() XPath function + * number number(object?) + */ +void +xmlXPathNumberFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + double res; + + if (nargs == 0) { + if (ctxt->context->node == NULL) { + valuePush(ctxt, xmlXPathNewFloat(0.0)); + } else { + xmlChar* content = xmlNodeGetContent(ctxt->context->node); + + res = xmlXPathStringEvalNumber(content); + valuePush(ctxt, xmlXPathNewFloat(res)); + xmlFree(content); + } + return; + } + + CHECK_ARITY(1); + cur = valuePop(ctxt); + cur = xmlXPathConvertNumber(cur); + valuePush(ctxt, cur); +} + +/** + * xmlXPathSumFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the sum() XPath function + * number sum(node-set) + * The sum function returns the sum of the values of the nodes in + * the argument node-set. + */ +void +xmlXPathSumFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + int i; + double res = 0.0; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if ((cur->nodesetval == NULL) || (cur->nodesetval->nodeNr == 0)) { + valuePush(ctxt, xmlXPathNewFloat(0.0)); + } else { + for (i = 0; i < cur->nodesetval->nodeNr; i++) { + res += xmlXPathCastNodeToNumber(cur->nodesetval->nodeTab[i]); + } + valuePush(ctxt, xmlXPathNewFloat(res)); + } + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathFloorFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the floor() XPath function + * number floor(number) + * The floor function returns the largest (closest to positive infinity) + * number that is not greater than the argument and that is an integer. + */ +void +xmlXPathFloorFunction(xmlXPathParserContextPtr ctxt, int nargs) { + double f; + + CHECK_ARITY(1); + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + + f = (double)((int) ctxt->value->floatval); + if (f != ctxt->value->floatval) { + if (ctxt->value->floatval > 0) + ctxt->value->floatval = f; + else + ctxt->value->floatval = f - 1; + } +} + +/** + * xmlXPathCeilingFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the ceiling() XPath function + * number ceiling(number) + * The ceiling function returns the smallest (closest to negative infinity) + * number that is not less than the argument and that is an integer. + */ +void +xmlXPathCeilingFunction(xmlXPathParserContextPtr ctxt, int nargs) { + double f; + + CHECK_ARITY(1); + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + +#if 0 + ctxt->value->floatval = ceil(ctxt->value->floatval); +#else + f = (double)((int) ctxt->value->floatval); + if (f != ctxt->value->floatval) { + if (ctxt->value->floatval > 0) + ctxt->value->floatval = f + 1; + else { + if (ctxt->value->floatval < 0 && f == 0) + ctxt->value->floatval = xmlXPathNZERO; + else + ctxt->value->floatval = f; + } + + } +#endif +} + +/** + * xmlXPathRoundFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the round() XPath function + * number round(number) + * The round function returns the number that is closest to the + * argument and that is an integer. If there are two such numbers, + * then the one that is even is returned. + */ +void +xmlXPathRoundFunction(xmlXPathParserContextPtr ctxt, int nargs) { + double f; + + CHECK_ARITY(1); + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + + if ((xmlXPathIsNaN(ctxt->value->floatval)) || + (xmlXPathIsInf(ctxt->value->floatval) == 1) || + (xmlXPathIsInf(ctxt->value->floatval) == -1) || + (ctxt->value->floatval == 0.0)) + return; + + f = (double)((int) ctxt->value->floatval); + if (ctxt->value->floatval < 0) { + if (ctxt->value->floatval < f - 0.5) + ctxt->value->floatval = f - 1; + else + ctxt->value->floatval = f; + if (ctxt->value->floatval == 0) + ctxt->value->floatval = xmlXPathNZERO; + } else { + if (ctxt->value->floatval < f + 0.5) + ctxt->value->floatval = f; + else + ctxt->value->floatval = f + 1; + } +} + +/************************************************************************ + * * + * The Parser * + * * + ************************************************************************/ + +/* + * a couple of forward declarations since we use a recursive call based + * implementation. + */ +static void xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt); +static void xmlXPathCompPredicate(xmlXPathParserContextPtr ctxt, int filter); +static void xmlXPathCompLocationPath(xmlXPathParserContextPtr ctxt); +static void xmlXPathCompRelativeLocationPath(xmlXPathParserContextPtr ctxt); +static xmlChar * xmlXPathParseNameComplex(xmlXPathParserContextPtr ctxt, + int qualified); + +/** + * xmlXPathCurrentChar: + * @ctxt: the XPath parser context + * @cur: pointer to the beginning of the char + * @len: pointer to the length of the char read + * + * The current char value, if using UTF-8 this may actually span multiple + * bytes in the input buffer. + * + * Returns the current char value and its length + */ + +static int +xmlXPathCurrentChar(xmlXPathParserContextPtr ctxt, int *len) { + unsigned char c; + unsigned int val; + const xmlChar *cur; + + if (ctxt == NULL) + return(0); + cur = ctxt->cur; + + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + c = *cur; + if (c & 0x80) { + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + if (!IS_CHAR(val)) { + XP_ERROR0(XPATH_INVALID_CHAR_ERROR); + } + return(val); + } else { + /* 1-byte code */ + *len = 1; + return((int) *cur); + } +encoding_error: + /* + * If we detect an UTF8 error that probably mean that the + * input encoding didn't get properly advertized in the + * declaration header. Report the error and switch the encoding + * to ISO-Latin-1 (if you don't like this policy, just declare the + * encoding !) + */ + *len = 0; + XP_ERROR0(XPATH_ENCODING_ERROR); +} + +/** + * xmlXPathParseNCName: + * @ctxt: the XPath Parser context + * + * parse an XML namespace non qualified name. + * + * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* + * + * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * Returns the namespace name or NULL + */ + +xmlChar * +xmlXPathParseNCName(xmlXPathParserContextPtr ctxt) { + const xmlChar *in; + xmlChar *ret; + int count = 0; + + /* + * Accelerator for simple ASCII names + */ + in = ctxt->cur; + if (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + (*in == '_')) { + in++; + while (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + ((*in >= 0x30) && (*in <= 0x39)) || + (*in == '_') || (*in == '.') || + (*in == '-')) + in++; + if ((*in == ' ') || (*in == '>') || (*in == '/') || + (*in == '[') || (*in == ']') || (*in == ':') || + (*in == '@') || (*in == '*')) { + count = in - ctxt->cur; + if (count == 0) + return(NULL); + ret = xmlStrndup(ctxt->cur, count); + ctxt->cur = in; + return(ret); + } + } + return(xmlXPathParseNameComplex(ctxt, 0)); +} + + +/** + * xmlXPathParseQName: + * @ctxt: the XPath Parser context + * @prefix: a xmlChar ** + * + * parse an XML qualified name + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns the function returns the local part, and prefix is updated + * to get the Prefix if any. + */ + +static xmlChar * +xmlXPathParseQName(xmlXPathParserContextPtr ctxt, xmlChar **prefix) { + xmlChar *ret = NULL; + + *prefix = NULL; + ret = xmlXPathParseNCName(ctxt); + if (CUR == ':') { + *prefix = ret; + NEXT; + ret = xmlXPathParseNCName(ctxt); + } + return(ret); +} + +/** + * xmlXPathParseName: + * @ctxt: the XPath Parser context + * + * parse an XML name + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * Returns the namespace name or NULL + */ + +xmlChar * +xmlXPathParseName(xmlXPathParserContextPtr ctxt) { + const xmlChar *in; + xmlChar *ret; + int count = 0; + + /* + * Accelerator for simple ASCII names + */ + in = ctxt->cur; + if (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + (*in == '_') || (*in == ':')) { + in++; + while (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + ((*in >= 0x30) && (*in <= 0x39)) || + (*in == '_') || (*in == '-') || + (*in == ':') || (*in == '.')) + in++; + if ((*in > 0) && (*in < 0x80)) { + count = in - ctxt->cur; + ret = xmlStrndup(ctxt->cur, count); + ctxt->cur = in; + return(ret); + } + } + return(xmlXPathParseNameComplex(ctxt, 1)); +} + +static xmlChar * +xmlXPathParseNameComplex(xmlXPathParserContextPtr ctxt, int qualified) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0, l; + int c; + + /* + * Handler for more complex cases + */ + c = CUR_CHAR(l); + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (c == '[') || (c == ']') || (c == '@') || /* accelerators */ + (c == '*') || /* accelerators */ + (!IS_LETTER(c) && (c != '_') && + ((qualified) && (c != ':')))) { + return(NULL); + } + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || ((qualified) && (c == ':')) || + (IS_COMBINING(c)) || + (IS_EXTENDER(c)))) { + COPY_BUF(l,buf,len,c); + NEXTL(l); + c = CUR_CHAR(l); + if (len >= XML_MAX_NAMELEN) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + XP_ERROR0(XPATH_MEMORY_ERROR); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ + (c == '.') || (c == '-') || + (c == '_') || ((qualified) && (c == ':')) || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + XP_ERROR0(XPATH_MEMORY_ERROR); + } + } + COPY_BUF(l,buffer,len,c); + NEXTL(l); + c = CUR_CHAR(l); + } + buffer[len] = 0; + return(buffer); + } + } + if (len == 0) + return(NULL); + return(xmlStrndup(buf, len)); +} + +#define MAX_FRAC 20 + +static double my_pow10[MAX_FRAC] = { + 1.0, 10.0, 100.0, 1000.0, 10000.0, + 100000.0, 1000000.0, 10000000.0, 100000000.0, 1000000000.0, + 10000000000.0, 100000000000.0, 1000000000000.0, 10000000000000.0, + 100000000000000.0, + 1000000000000000.0, 10000000000000000.0, 100000000000000000.0, + 1000000000000000000.0, 10000000000000000000.0 +}; + +/** + * xmlXPathStringEvalNumber: + * @str: A string to scan + * + * [30a] Float ::= Number ('e' Digits?)? + * + * [30] Number ::= Digits ('.' Digits?)? + * | '.' Digits + * [31] Digits ::= [0-9]+ + * + * Compile a Number in the string + * In complement of the Number expression, this function also handles + * negative values : '-' Number. + * + * Returns the double value. + */ +double +xmlXPathStringEvalNumber(const xmlChar *str) { + const xmlChar *cur = str; + double ret; + int ok = 0; + int isneg = 0; + int exponent = 0; + int is_exponent_negative = 0; +#ifdef __GNUC__ + unsigned long tmp = 0; + double temp; +#endif + if (cur == NULL) return(0); + while (IS_BLANK(*cur)) cur++; + if ((*cur != '.') && ((*cur < '0') || (*cur > '9')) && (*cur != '-')) { + return(xmlXPathNAN); + } + if (*cur == '-') { + isneg = 1; + cur++; + } + +#ifdef __GNUC__ + /* + * tmp/temp is a workaround against a gcc compiler bug + * http://veillard.com/gcc.bug + */ + ret = 0; + while ((*cur >= '0') && (*cur <= '9')) { + ret = ret * 10; + tmp = (*cur - '0'); + ok = 1; + cur++; + temp = (double) tmp; + ret = ret + temp; + } +#else + ret = 0; + while ((*cur >= '0') && (*cur <= '9')) { + ret = ret * 10 + (*cur - '0'); + ok = 1; + cur++; + } +#endif + + if (*cur == '.') { + int v, frac = 0; + double fraction = 0; + + cur++; + if (((*cur < '0') || (*cur > '9')) && (!ok)) { + return(xmlXPathNAN); + } + while (((*cur >= '0') && (*cur <= '9')) && (frac < MAX_FRAC)) { + v = (*cur - '0'); + fraction = fraction * 10 + v; + frac = frac + 1; + cur++; + } + fraction /= my_pow10[frac]; + ret = ret + fraction; + while ((*cur >= '0') && (*cur <= '9')) + cur++; + } + if ((*cur == 'e') || (*cur == 'E')) { + cur++; + if (*cur == '-') { + is_exponent_negative = 1; + cur++; + } + while ((*cur >= '0') && (*cur <= '9')) { + exponent = exponent * 10 + (*cur - '0'); + cur++; + } + } + while (IS_BLANK(*cur)) cur++; + if (*cur != 0) return(xmlXPathNAN); + if (isneg) ret = -ret; + if (is_exponent_negative) exponent = -exponent; + ret *= pow(10.0, (double)exponent); + return(ret); +} + +/** + * xmlXPathCompNumber: + * @ctxt: the XPath Parser context + * + * [30] Number ::= Digits ('.' Digits?)? + * | '.' Digits + * [31] Digits ::= [0-9]+ + * + * Compile a Number, then push it on the stack + * + */ +static void +xmlXPathCompNumber(xmlXPathParserContextPtr ctxt) +{ + double ret = 0.0; + double mult = 1; + int ok = 0; + int exponent = 0; + int is_exponent_negative = 0; +#ifdef __GNUC__ + unsigned long tmp = 0; + double temp; +#endif + + CHECK_ERROR; + if ((CUR != '.') && ((CUR < '0') || (CUR > '9'))) { + XP_ERROR(XPATH_NUMBER_ERROR); + } +#ifdef __GNUC__ + /* + * tmp/temp is a workaround against a gcc compiler bug + * http://veillard.com/gcc.bug + */ + ret = 0; + while ((CUR >= '0') && (CUR <= '9')) { + ret = ret * 10; + tmp = (CUR - '0'); + ok = 1; + NEXT; + temp = (double) tmp; + ret = ret + temp; + } +#else + ret = 0; + while ((CUR >= '0') && (CUR <= '9')) { + ret = ret * 10 + (CUR - '0'); + ok = 1; + NEXT; + } +#endif + if (CUR == '.') { + NEXT; + if (((CUR < '0') || (CUR > '9')) && (!ok)) { + XP_ERROR(XPATH_NUMBER_ERROR); + } + while ((CUR >= '0') && (CUR <= '9')) { + mult /= 10; + ret = ret + (CUR - '0') * mult; + NEXT; + } + } + if ((CUR == 'e') || (CUR == 'E')) { + NEXT; + if (CUR == '-') { + is_exponent_negative = 1; + NEXT; + } + while ((CUR >= '0') && (CUR <= '9')) { + exponent = exponent * 10 + (CUR - '0'); + NEXT; + } + if (is_exponent_negative) + exponent = -exponent; + ret *= pow(10.0, (double) exponent); + } + PUSH_LONG_EXPR(XPATH_OP_VALUE, XPATH_NUMBER, 0, 0, + xmlXPathNewFloat(ret), NULL); +} + +/** + * xmlXPathParseLiteral: + * @ctxt: the XPath Parser context + * + * Parse a Literal + * + * [29] Literal ::= '"' [^"]* '"' + * | "'" [^']* "'" + * + * Returns the value found or NULL in case of error + */ +static xmlChar * +xmlXPathParseLiteral(xmlXPathParserContextPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) + NEXT; + if (!IS_CHAR(CUR)) { + XP_ERROR0(XPATH_UNFINISHED_LITERAL_ERROR); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_CHAR(CUR)) { + XP_ERROR0(XPATH_UNFINISHED_LITERAL_ERROR); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + XP_ERROR0(XPATH_START_LITERAL_ERROR); + } + return(ret); +} + +/** + * xmlXPathCompLiteral: + * @ctxt: the XPath Parser context + * + * Parse a Literal and push it on the stack. + * + * [29] Literal ::= '"' [^"]* '"' + * | "'" [^']* "'" + * + * TODO: xmlXPathCompLiteral memory allocation could be improved. + */ +static void +xmlXPathCompLiteral(xmlXPathParserContextPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) + NEXT; + if (!IS_CHAR(CUR)) { + XP_ERROR(XPATH_UNFINISHED_LITERAL_ERROR); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_CHAR(CUR)) { + XP_ERROR(XPATH_UNFINISHED_LITERAL_ERROR); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + XP_ERROR(XPATH_START_LITERAL_ERROR); + } + if (ret == NULL) return; + PUSH_LONG_EXPR(XPATH_OP_VALUE, XPATH_STRING, 0, 0, + xmlXPathNewString(ret), NULL); + xmlFree(ret); +} + +/** + * xmlXPathCompVariableReference: + * @ctxt: the XPath Parser context + * + * Parse a VariableReference, evaluate it and push it on the stack. + * + * The variable bindings consist of a mapping from variable names + * to variable values. The value of a variable is an object, which + * of any of the types that are possible for the value of an expression, + * and may also be of additional types not specified here. + * + * Early evaluation is possible since: + * The variable bindings [...] used to evaluate a subexpression are + * always the same as those used to evaluate the containing expression. + * + * [36] VariableReference ::= '$' QName + */ +static void +xmlXPathCompVariableReference(xmlXPathParserContextPtr ctxt) { + xmlChar *name; + xmlChar *prefix; + + SKIP_BLANKS; + if (CUR != '$') { + XP_ERROR(XPATH_VARIABLE_REF_ERROR); + } + NEXT; + name = xmlXPathParseQName(ctxt, &prefix); + if (name == NULL) { + XP_ERROR(XPATH_VARIABLE_REF_ERROR); + } + ctxt->comp->last = -1; + PUSH_LONG_EXPR(XPATH_OP_VARIABLE, 0, 0, 0, + name, prefix); + SKIP_BLANKS; +} + +/** + * xmlXPathIsNodeType: + * @name: a name string + * + * Is the name given a NodeType one. + * + * [38] NodeType ::= 'comment' + * | 'text' + * | 'processing-instruction' + * | 'node' + * + * Returns 1 if true 0 otherwise + */ +int +xmlXPathIsNodeType(const xmlChar *name) { + if (name == NULL) + return(0); + + if (xmlStrEqual(name, BAD_CAST "node")) + return(1); + if (xmlStrEqual(name, BAD_CAST "text")) + return(1); + if (xmlStrEqual(name, BAD_CAST "comment")) + return(1); + if (xmlStrEqual(name, BAD_CAST "processing-instruction")) + return(1); + return(0); +} + +/** + * xmlXPathCompFunctionCall: + * @ctxt: the XPath Parser context + * + * [16] FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument)*)? ')' + * [17] Argument ::= Expr + * + * Compile a function call, the evaluation of all arguments are + * pushed on the stack + */ +static void +xmlXPathCompFunctionCall(xmlXPathParserContextPtr ctxt) { + xmlChar *name; + xmlChar *prefix; + int nbargs = 0; + + name = xmlXPathParseQName(ctxt, &prefix); + if (name == NULL) { + XP_ERROR(XPATH_EXPR_ERROR); + } + SKIP_BLANKS; +#ifdef DEBUG_EXPR + if (prefix == NULL) + xmlGenericError(xmlGenericErrorContext, "Calling function %s\n", + name); + else + xmlGenericError(xmlGenericErrorContext, "Calling function %s:%s\n", + prefix, name); +#endif + + if (CUR != '(') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + SKIP_BLANKS; + + ctxt->comp->last = -1; + while (CUR != ')') { + int op1 = ctxt->comp->last; + ctxt->comp->last = -1; + xmlXPathCompileExpr(ctxt); + CHECK_ERROR; + PUSH_BINARY_EXPR(XPATH_OP_ARG, op1, ctxt->comp->last, 0, 0); + nbargs++; + if (CUR == ')') break; + if (CUR != ',') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + SKIP_BLANKS; + } + PUSH_LONG_EXPR(XPATH_OP_FUNCTION, nbargs, 0, 0, + name, prefix); + NEXT; + SKIP_BLANKS; +} + +/** + * xmlXPathCompPrimaryExpr: + * @ctxt: the XPath Parser context + * + * [15] PrimaryExpr ::= VariableReference + * | '(' Expr ')' + * | Literal + * | Number + * | FunctionCall + * + * Compile a primary expression. + */ +static void +xmlXPathCompPrimaryExpr(xmlXPathParserContextPtr ctxt) { + SKIP_BLANKS; + if (CUR == '$') xmlXPathCompVariableReference(ctxt); + else if (CUR == '(') { + NEXT; + SKIP_BLANKS; + xmlXPathCompileExpr(ctxt); + CHECK_ERROR; + if (CUR != ')') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + SKIP_BLANKS; + } else if (IS_DIGIT(CUR) || (CUR == '.' && IS_DIGIT(NXT(1)))) { + xmlXPathCompNumber(ctxt); + } else if ((CUR == '\'') || (CUR == '"')) { + xmlXPathCompLiteral(ctxt); + } else { + xmlXPathCompFunctionCall(ctxt); + } + SKIP_BLANKS; +} + +/** + * xmlXPathCompFilterExpr: + * @ctxt: the XPath Parser context + * + * [20] FilterExpr ::= PrimaryExpr + * | FilterExpr Predicate + * + * Compile a filter expression. + * Square brackets are used to filter expressions in the same way that + * they are used in location paths. It is an error if the expression to + * be filtered does not evaluate to a node-set. The context node list + * used for evaluating the expression in square brackets is the node-set + * to be filtered listed in document order. + */ + +static void +xmlXPathCompFilterExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathCompPrimaryExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + + while (CUR == '[') { + xmlXPathCompPredicate(ctxt, 1); + SKIP_BLANKS; + } + + +} + +/** + * xmlXPathScanName: + * @ctxt: the XPath Parser context + * + * Trickery: parse an XML name but without consuming the input flow + * Needed to avoid insanity in the parser state. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + * + * Returns the Name parsed or NULL + */ + +static xmlChar * +xmlXPathScanName(xmlXPathParserContextPtr ctxt) { + xmlChar buf[XML_MAX_NAMELEN]; + int len = 0; + + SKIP_BLANKS; + if (!IS_LETTER(CUR) && (CUR != '_') && + (CUR != ':')) { + return(NULL); + } + + while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || + (NXT(len) == '.') || (NXT(len) == '-') || + (NXT(len) == '_') || (NXT(len) == ':') || + (IS_COMBINING(NXT(len))) || + (IS_EXTENDER(NXT(len)))) { + buf[len] = NXT(len); + len++; + if (len >= XML_MAX_NAMELEN) { + xmlGenericError(xmlGenericErrorContext, + "xmlScanName: reached XML_MAX_NAMELEN limit\n"); + while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || + (NXT(len) == '.') || (NXT(len) == '-') || + (NXT(len) == '_') || (NXT(len) == ':') || + (IS_COMBINING(NXT(len))) || + (IS_EXTENDER(NXT(len)))) + len++; + break; + } + } + return(xmlStrndup(buf, len)); +} + +/** + * xmlXPathCompPathExpr: + * @ctxt: the XPath Parser context + * + * [19] PathExpr ::= LocationPath + * | FilterExpr + * | FilterExpr '/' RelativeLocationPath + * | FilterExpr '//' RelativeLocationPath + * + * Compile a path expression. + * The / operator and // operators combine an arbitrary expression + * and a relative location path. It is an error if the expression + * does not evaluate to a node-set. + * The / operator does composition in the same way as when / is + * used in a location path. As in location paths, // is short for + * /descendant-or-self::node()/. + */ + +static void +xmlXPathCompPathExpr(xmlXPathParserContextPtr ctxt) { + int lc = 1; /* Should we branch to LocationPath ? */ + xmlChar *name = NULL; /* we may have to preparse a name to find out */ + + SKIP_BLANKS; + if ((CUR == '$') || (CUR == '(') || (IS_DIGIT(CUR)) || + (CUR == '\'') || (CUR == '"') || (CUR == '.' && IS_DIGIT(NXT(1)))) { + lc = 0; + } else if (CUR == '*') { + /* relative or absolute location path */ + lc = 1; + } else if (CUR == '/') { + /* relative or absolute location path */ + lc = 1; + } else if (CUR == '@') { + /* relative abbreviated attribute location path */ + lc = 1; + } else if (CUR == '.') { + /* relative abbreviated attribute location path */ + lc = 1; + } else { + /* + * Problem is finding if we have a name here whether it's: + * - a nodetype + * - a function call in which case it's followed by '(' + * - an axis in which case it's followed by ':' + * - a element name + * We do an a priori analysis here rather than having to + * maintain parsed token content through the recursive function + * calls. This looks uglier but makes the code quite easier to + * read/write/debug. + */ + SKIP_BLANKS; + name = xmlXPathScanName(ctxt); + if ((name != NULL) && (xmlStrstr(name, (xmlChar *) "::") != NULL)) { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: Axis\n"); +#endif + lc = 1; + xmlFree(name); + } else if (name != NULL) { + int len =xmlStrlen(name); + int blank = 0; + + + while (NXT(len) != 0) { + if (NXT(len) == '/') { + /* element name */ +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: AbbrRelLocation\n"); +#endif + lc = 1; + break; + } else if (IS_BLANK(NXT(len))) { + /* skip to next */ + blank = 1; + } else if (NXT(len) == ':') { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: AbbrRelLocation\n"); +#endif + lc = 1; + break; + } else if ((NXT(len) == '(')) { + /* Note Type or Function */ + if (xmlXPathIsNodeType(name)) { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: Type search\n"); +#endif + lc = 1; + } else { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: function call\n"); +#endif + lc = 0; + } + break; + } else if ((NXT(len) == '[')) { + /* element name */ +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: AbbrRelLocation\n"); +#endif + lc = 1; + break; + } else if ((NXT(len) == '<') || (NXT(len) == '>') || + (NXT(len) == '=')) { + lc = 1; + break; + } else { + lc = 1; + break; + } + len++; + } + if (NXT(len) == 0) { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: AbbrRelLocation\n"); +#endif + /* element name */ + lc = 1; + } + xmlFree(name); + } else { + /* make sure all cases are covered explicitely */ + XP_ERROR(XPATH_EXPR_ERROR); + } + } + + if (lc) { + if (CUR == '/') { + PUSH_LEAVE_EXPR(XPATH_OP_ROOT, 0, 0); + } else { + PUSH_LEAVE_EXPR(XPATH_OP_NODE, 0, 0); + } + xmlXPathCompLocationPath(ctxt); + } else { + xmlXPathCompFilterExpr(ctxt); + CHECK_ERROR; + if ((CUR == '/') && (NXT(1) == '/')) { + SKIP(2); + SKIP_BLANKS; + + PUSH_LONG_EXPR(XPATH_OP_COLLECT, AXIS_DESCENDANT_OR_SELF, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + PUSH_UNARY_EXPR(XPATH_OP_RESET, ctxt->comp->last, 1, 0); + + xmlXPathCompRelativeLocationPath(ctxt); + } else if (CUR == '/') { + xmlXPathCompRelativeLocationPath(ctxt); + } + } + SKIP_BLANKS; +} + +/** + * xmlXPathCompUnionExpr: + * @ctxt: the XPath Parser context + * + * [18] UnionExpr ::= PathExpr + * | UnionExpr '|' PathExpr + * + * Compile an union expression. + */ + +static void +xmlXPathCompUnionExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathCompPathExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while (CUR == '|') { + int op1 = ctxt->comp->last; + PUSH_LEAVE_EXPR(XPATH_OP_NODE, 0, 0); + + NEXT; + SKIP_BLANKS; + xmlXPathCompPathExpr(ctxt); + + PUSH_BINARY_EXPR(XPATH_OP_UNION, op1, ctxt->comp->last, 0, 0); + + SKIP_BLANKS; + } +} + +/** + * xmlXPathCompUnaryExpr: + * @ctxt: the XPath Parser context + * + * [27] UnaryExpr ::= UnionExpr + * | '-' UnaryExpr + * + * Compile an unary expression. + */ + +static void +xmlXPathCompUnaryExpr(xmlXPathParserContextPtr ctxt) { + int minus = 0; + int found = 0; + + SKIP_BLANKS; + while (CUR == '-') { + minus = 1 - minus; + found = 1; + NEXT; + SKIP_BLANKS; + } + + xmlXPathCompUnionExpr(ctxt); + CHECK_ERROR; + if (found) { + if (minus) + PUSH_UNARY_EXPR(XPATH_OP_PLUS, ctxt->comp->last, 2, 0); + else + PUSH_UNARY_EXPR(XPATH_OP_PLUS, ctxt->comp->last, 3, 0); + } +} + +/** + * xmlXPathCompMultiplicativeExpr: + * @ctxt: the XPath Parser context + * + * [26] MultiplicativeExpr ::= UnaryExpr + * | MultiplicativeExpr MultiplyOperator UnaryExpr + * | MultiplicativeExpr 'div' UnaryExpr + * | MultiplicativeExpr 'mod' UnaryExpr + * [34] MultiplyOperator ::= '*' + * + * Compile an Additive expression. + */ + +static void +xmlXPathCompMultiplicativeExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathCompUnaryExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == '*') || + ((CUR == 'd') && (NXT(1) == 'i') && (NXT(2) == 'v')) || + ((CUR == 'm') && (NXT(1) == 'o') && (NXT(2) == 'd'))) { + int op = -1; + int op1 = ctxt->comp->last; + + if (CUR == '*') { + op = 0; + NEXT; + } else if (CUR == 'd') { + op = 1; + SKIP(3); + } else if (CUR == 'm') { + op = 2; + SKIP(3); + } + SKIP_BLANKS; + xmlXPathCompUnaryExpr(ctxt); + CHECK_ERROR; + PUSH_BINARY_EXPR(XPATH_OP_MULT, op1, ctxt->comp->last, op, 0); + SKIP_BLANKS; + } +} + +/** + * xmlXPathCompAdditiveExpr: + * @ctxt: the XPath Parser context + * + * [25] AdditiveExpr ::= MultiplicativeExpr + * | AdditiveExpr '+' MultiplicativeExpr + * | AdditiveExpr '-' MultiplicativeExpr + * + * Compile an Additive expression. + */ + +static void +xmlXPathCompAdditiveExpr(xmlXPathParserContextPtr ctxt) { + + xmlXPathCompMultiplicativeExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == '+') || (CUR == '-')) { + int plus; + int op1 = ctxt->comp->last; + + if (CUR == '+') plus = 1; + else plus = 0; + NEXT; + SKIP_BLANKS; + xmlXPathCompMultiplicativeExpr(ctxt); + CHECK_ERROR; + PUSH_BINARY_EXPR(XPATH_OP_PLUS, op1, ctxt->comp->last, plus, 0); + SKIP_BLANKS; + } +} + +/** + * xmlXPathCompRelationalExpr: + * @ctxt: the XPath Parser context + * + * [24] RelationalExpr ::= AdditiveExpr + * | RelationalExpr '<' AdditiveExpr + * | RelationalExpr '>' AdditiveExpr + * | RelationalExpr '<=' AdditiveExpr + * | RelationalExpr '>=' AdditiveExpr + * + * A <= B > C is allowed ? Answer from James, yes with + * (AdditiveExpr <= AdditiveExpr) > AdditiveExpr + * which is basically what got implemented. + * + * Compile a Relational expression, then push the result + * on the stack + */ + +static void +xmlXPathCompRelationalExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathCompAdditiveExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == '<') || + (CUR == '>') || + ((CUR == '<') && (NXT(1) == '=')) || + ((CUR == '>') && (NXT(1) == '='))) { + int inf, strict; + int op1 = ctxt->comp->last; + + if (CUR == '<') inf = 1; + else inf = 0; + if (NXT(1) == '=') strict = 0; + else strict = 1; + NEXT; + if (!strict) NEXT; + SKIP_BLANKS; + xmlXPathCompAdditiveExpr(ctxt); + CHECK_ERROR; + PUSH_BINARY_EXPR(XPATH_OP_CMP, op1, ctxt->comp->last, inf, strict); + SKIP_BLANKS; + } +} + +/** + * xmlXPathCompEqualityExpr: + * @ctxt: the XPath Parser context + * + * [23] EqualityExpr ::= RelationalExpr + * | EqualityExpr '=' RelationalExpr + * | EqualityExpr '!=' RelationalExpr + * + * A != B != C is allowed ? Answer from James, yes with + * (RelationalExpr = RelationalExpr) = RelationalExpr + * (RelationalExpr != RelationalExpr) != RelationalExpr + * which is basically what got implemented. + * + * Compile an Equality expression. + * + */ +static void +xmlXPathCompEqualityExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathCompRelationalExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == '=') || ((CUR == '!') && (NXT(1) == '='))) { + int eq; + int op1 = ctxt->comp->last; + + if (CUR == '=') eq = 1; + else eq = 0; + NEXT; + if (!eq) NEXT; + SKIP_BLANKS; + xmlXPathCompRelationalExpr(ctxt); + CHECK_ERROR; + PUSH_BINARY_EXPR(XPATH_OP_EQUAL, op1, ctxt->comp->last, eq, 0); + SKIP_BLANKS; + } +} + +/** + * xmlXPathCompAndExpr: + * @ctxt: the XPath Parser context + * + * [22] AndExpr ::= EqualityExpr + * | AndExpr 'and' EqualityExpr + * + * Compile an AND expression. + * + */ +static void +xmlXPathCompAndExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathCompEqualityExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == 'a') && (NXT(1) == 'n') && (NXT(2) == 'd')) { + int op1 = ctxt->comp->last; + SKIP(3); + SKIP_BLANKS; + xmlXPathCompEqualityExpr(ctxt); + CHECK_ERROR; + PUSH_BINARY_EXPR(XPATH_OP_AND, op1, ctxt->comp->last, 0, 0); + SKIP_BLANKS; + } +} + +/** + * xmlXPathCompExpr: + * @ctxt: the XPath Parser context + * + * [14] Expr ::= OrExpr + * [21] OrExpr ::= AndExpr + * | OrExpr 'or' AndExpr + * + * Parse and compile an expression + */ +static void +xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathCompAndExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == 'o') && (NXT(1) == 'r')) { + int op1 = ctxt->comp->last; + SKIP(2); + SKIP_BLANKS; + xmlXPathCompAndExpr(ctxt); + CHECK_ERROR; + PUSH_BINARY_EXPR(XPATH_OP_OR, op1, ctxt->comp->last, 0, 0); + op1 = ctxt->comp->nbStep; + SKIP_BLANKS; + } + if (ctxt->comp->steps[ctxt->comp->last].op != XPATH_OP_VALUE) { + /* more ops could be optimized too */ + PUSH_UNARY_EXPR(XPATH_OP_SORT, ctxt->comp->last , 0, 0); + } +} + +/** + * xmlXPathCompPredicate: + * @ctxt: the XPath Parser context + * @filter: act as a filter + * + * [8] Predicate ::= '[' PredicateExpr ']' + * [9] PredicateExpr ::= Expr + * + * Compile a predicate expression + */ +static void +xmlXPathCompPredicate(xmlXPathParserContextPtr ctxt, int filter) { + int op1 = ctxt->comp->last; + + SKIP_BLANKS; + if (CUR != '[') { + XP_ERROR(XPATH_INVALID_PREDICATE_ERROR); + } + NEXT; + SKIP_BLANKS; + + ctxt->comp->last = -1; + xmlXPathCompileExpr(ctxt); + CHECK_ERROR; + + if (CUR != ']') { + XP_ERROR(XPATH_INVALID_PREDICATE_ERROR); + } + + if (filter) + PUSH_BINARY_EXPR(XPATH_OP_FILTER, op1, ctxt->comp->last, 0, 0); + else + PUSH_BINARY_EXPR(XPATH_OP_PREDICATE, op1, ctxt->comp->last, 0, 0); + + NEXT; + SKIP_BLANKS; +} + +/** + * xmlXPathCompNodeTest: + * @ctxt: the XPath Parser context + * @test: pointer to a xmlXPathTestVal + * @type: pointer to a xmlXPathTypeVal + * @prefix: placeholder for a possible name prefix + * + * [7] NodeTest ::= NameTest + * | NodeType '(' ')' + * | 'processing-instruction' '(' Literal ')' + * + * [37] NameTest ::= '*' + * | NCName ':' '*' + * | QName + * [38] NodeType ::= 'comment' + * | 'text' + * | 'processing-instruction' + * | 'node' + * + * Returns the name found and update @test, @type and @prefix appropriately + */ +static xmlChar * +xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test, + xmlXPathTypeVal *type, const xmlChar **prefix, + xmlChar *name) { + int blanks; + + if ((test == NULL) || (type == NULL) || (prefix == NULL)) { + STRANGE; + return(NULL); + } + *type = 0; + *test = 0; + *prefix = NULL; + SKIP_BLANKS; + + if ((name == NULL) && (CUR == '*')) { + /* + * All elements + */ + NEXT; + *test = NODE_TEST_ALL; + return(NULL); + } + + if (name == NULL) + name = xmlXPathParseNCName(ctxt); + if (name == NULL) { + XP_ERROR0(XPATH_EXPR_ERROR); + } + + blanks = IS_BLANK(CUR); + SKIP_BLANKS; + if (CUR == '(') { + NEXT; + /* + * NodeType or PI search + */ + if (xmlStrEqual(name, BAD_CAST "comment")) + *type = NODE_TYPE_COMMENT; + else if (xmlStrEqual(name, BAD_CAST "node")) + *type = NODE_TYPE_NODE; + else if (xmlStrEqual(name, BAD_CAST "processing-instruction")) + *type = NODE_TYPE_PI; + else if (xmlStrEqual(name, BAD_CAST "text")) + *type = NODE_TYPE_TEXT; + else { + if (name != NULL) + xmlFree(name); + XP_ERROR0(XPATH_EXPR_ERROR); + } + + *test = NODE_TEST_TYPE; + + SKIP_BLANKS; + if (*type == NODE_TYPE_PI) { + /* + * Specific case: search a PI by name. + */ + if (name != NULL) + xmlFree(name); + name = NULL; + if (CUR != ')') { + name = xmlXPathParseLiteral(ctxt); + CHECK_ERROR 0; + *test = NODE_TEST_PI; + SKIP_BLANKS; + } + } + if (CUR != ')') { + if (name != NULL) + xmlFree(name); + XP_ERROR0(XPATH_UNCLOSED_ERROR); + } + NEXT; + return(name); + } + *test = NODE_TEST_NAME; + if ((!blanks) && (CUR == ':')) { + NEXT; + + /* + * Since currently the parser context don't have a + * namespace list associated: + * The namespace name for this prefix can be computed + * only at evaluation time. The compilation is done + * outside of any context. + */ +#if 0 + *prefix = xmlXPathNsLookup(ctxt->context, name); + if (name != NULL) + xmlFree(name); + if (*prefix == NULL) { + XP_ERROR0(XPATH_UNDEF_PREFIX_ERROR); + } +#else + *prefix = name; +#endif + + if (CUR == '*') { + /* + * All elements + */ + NEXT; + *test = NODE_TEST_ALL; + return(NULL); + } + + name = xmlXPathParseNCName(ctxt); + if (name == NULL) { + XP_ERROR0(XPATH_EXPR_ERROR); + } + } + return(name); +} + +/** + * xmlXPathIsAxisName: + * @name: a preparsed name token + * + * [6] AxisName ::= 'ancestor' + * | 'ancestor-or-self' + * | 'attribute' + * | 'child' + * | 'descendant' + * | 'descendant-or-self' + * | 'following' + * | 'following-sibling' + * | 'namespace' + * | 'parent' + * | 'preceding' + * | 'preceding-sibling' + * | 'self' + * + * Returns the axis or 0 + */ +static xmlXPathAxisVal +xmlXPathIsAxisName(const xmlChar *name) { + xmlXPathAxisVal ret = 0; + switch (name[0]) { + case 'a': + if (xmlStrEqual(name, BAD_CAST "ancestor")) + ret = AXIS_ANCESTOR; + if (xmlStrEqual(name, BAD_CAST "ancestor-or-self")) + ret = AXIS_ANCESTOR_OR_SELF; + if (xmlStrEqual(name, BAD_CAST "attribute")) + ret = AXIS_ATTRIBUTE; + break; + case 'c': + if (xmlStrEqual(name, BAD_CAST "child")) + ret = AXIS_CHILD; + break; + case 'd': + if (xmlStrEqual(name, BAD_CAST "descendant")) + ret = AXIS_DESCENDANT; + if (xmlStrEqual(name, BAD_CAST "descendant-or-self")) + ret = AXIS_DESCENDANT_OR_SELF; + break; + case 'f': + if (xmlStrEqual(name, BAD_CAST "following")) + ret = AXIS_FOLLOWING; + if (xmlStrEqual(name, BAD_CAST "following-sibling")) + ret = AXIS_FOLLOWING_SIBLING; + break; + case 'n': + if (xmlStrEqual(name, BAD_CAST "namespace")) + ret = AXIS_NAMESPACE; + break; + case 'p': + if (xmlStrEqual(name, BAD_CAST "parent")) + ret = AXIS_PARENT; + if (xmlStrEqual(name, BAD_CAST "preceding")) + ret = AXIS_PRECEDING; + if (xmlStrEqual(name, BAD_CAST "preceding-sibling")) + ret = AXIS_PRECEDING_SIBLING; + break; + case 's': + if (xmlStrEqual(name, BAD_CAST "self")) + ret = AXIS_SELF; + break; + } + return(ret); +} + +/** + * xmlXPathCompStep: + * @ctxt: the XPath Parser context + * + * [4] Step ::= AxisSpecifier NodeTest Predicate* + * | AbbreviatedStep + * + * [12] AbbreviatedStep ::= '.' | '..' + * + * [5] AxisSpecifier ::= AxisName '::' + * | AbbreviatedAxisSpecifier + * + * [13] AbbreviatedAxisSpecifier ::= '@'? + * + * Modified for XPtr range support as: + * + * [4xptr] Step ::= AxisSpecifier NodeTest Predicate* + * | AbbreviatedStep + * | 'range-to' '(' Expr ')' Predicate* + * + * Compile one step in a Location Path + * A location step of . is short for self::node(). This is + * particularly useful in conjunction with //. For example, the + * location path .//para is short for + * self::node()/descendant-or-self::node()/child::para + * and so will select all para descendant elements of the context + * node. + * Similarly, a location step of .. is short for parent::node(). + * For example, ../title is short for parent::node()/child::title + * and so will select the title children of the parent of the context + * node. + */ +static void +xmlXPathCompStep(xmlXPathParserContextPtr ctxt) { +#ifdef LIBXML_XPTR_ENABLED + int rangeto = 0; + int op2 = -1; +#endif + + SKIP_BLANKS; + if ((CUR == '.') && (NXT(1) == '.')) { + SKIP(2); + SKIP_BLANKS; + PUSH_LONG_EXPR(XPATH_OP_COLLECT, AXIS_PARENT, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + } else if (CUR == '.') { + NEXT; + SKIP_BLANKS; + } else { + xmlChar *name = NULL; + const xmlChar *prefix = NULL; + xmlXPathTestVal test; + xmlXPathAxisVal axis = 0; + xmlXPathTypeVal type; + int op1; + + /* + * The modification needed for XPointer change to the production + */ +#ifdef LIBXML_XPTR_ENABLED + if (ctxt->xptr) { + name = xmlXPathParseNCName(ctxt); + if ((name != NULL) && (xmlStrEqual(name, BAD_CAST "range-to"))) { + op2 = ctxt->comp->last; + xmlFree(name); + SKIP_BLANKS; + if (CUR != '(') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + SKIP_BLANKS; + + xmlXPathCompileExpr(ctxt); + /* PUSH_BINARY_EXPR(XPATH_OP_RANGETO, op2, ctxt->comp->last, 0, 0); */ + CHECK_ERROR; + + SKIP_BLANKS; + if (CUR != ')') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + rangeto = 1; + goto eval_predicates; + } + } +#endif + if (CUR == '*') { + axis = AXIS_CHILD; + } else { + if (name == NULL) + name = xmlXPathParseNCName(ctxt); + if (name != NULL) { + axis = xmlXPathIsAxisName(name); + if (axis != 0) { + SKIP_BLANKS; + if ((CUR == ':') && (NXT(1) == ':')) { + SKIP(2); + xmlFree(name); + name = NULL; + } else { + /* an element name can conflict with an axis one :-\ */ + axis = AXIS_CHILD; + } + } else { + axis = AXIS_CHILD; + } + } else if (CUR == '@') { + NEXT; + axis = AXIS_ATTRIBUTE; + } else { + axis = AXIS_CHILD; + } + } + + CHECK_ERROR; + + name = xmlXPathCompNodeTest(ctxt, &test, &type, &prefix, name); + if (test == 0) + return; + +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "Basis : computing new set\n"); +#endif + +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "Basis : "); + if (ctxt->value == NULL) + xmlGenericError(xmlGenericErrorContext, "no value\n"); + else if (ctxt->value->nodesetval == NULL) + xmlGenericError(xmlGenericErrorContext, "Empty\n"); + else + xmlGenericErrorContextNodeSet(stdout, ctxt->value->nodesetval); +#endif + +eval_predicates: + op1 = ctxt->comp->last; + ctxt->comp->last = -1; + + SKIP_BLANKS; + while (CUR == '[') { + xmlXPathCompPredicate(ctxt, 0); + } + +#ifdef LIBXML_XPTR_ENABLED + if (rangeto) { + PUSH_BINARY_EXPR(XPATH_OP_RANGETO, op2, op1, 0, 0); + } else +#endif + PUSH_FULL_EXPR(XPATH_OP_COLLECT, op1, ctxt->comp->last, axis, + test, type, (void *)prefix, (void *)name); + + } +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "Step : "); + if (ctxt->value == NULL) + xmlGenericError(xmlGenericErrorContext, "no value\n"); + else if (ctxt->value->nodesetval == NULL) + xmlGenericError(xmlGenericErrorContext, "Empty\n"); + else + xmlGenericErrorContextNodeSet(xmlGenericErrorContext, + ctxt->value->nodesetval); +#endif +} + +/** + * xmlXPathCompRelativeLocationPath: + * @ctxt: the XPath Parser context + * + * [3] RelativeLocationPath ::= Step + * | RelativeLocationPath '/' Step + * | AbbreviatedRelativeLocationPath + * [11] AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step + * + * Compile a relative location path. + */ +static void +xmlXPathCompRelativeLocationPath +(xmlXPathParserContextPtr ctxt) { + SKIP_BLANKS; + if ((CUR == '/') && (NXT(1) == '/')) { + SKIP(2); + SKIP_BLANKS; + PUSH_LONG_EXPR(XPATH_OP_COLLECT, AXIS_DESCENDANT_OR_SELF, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + } else if (CUR == '/') { + NEXT; + SKIP_BLANKS; + } + xmlXPathCompStep(ctxt); + SKIP_BLANKS; + while (CUR == '/') { + if ((CUR == '/') && (NXT(1) == '/')) { + SKIP(2); + SKIP_BLANKS; + PUSH_LONG_EXPR(XPATH_OP_COLLECT, AXIS_DESCENDANT_OR_SELF, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + xmlXPathCompStep(ctxt); + } else if (CUR == '/') { + NEXT; + SKIP_BLANKS; + xmlXPathCompStep(ctxt); + } + SKIP_BLANKS; + } +} + +/** + * xmlXPathCompLocationPath: + * @ctxt: the XPath Parser context + * + * [1] LocationPath ::= RelativeLocationPath + * | AbsoluteLocationPath + * [2] AbsoluteLocationPath ::= '/' RelativeLocationPath? + * | AbbreviatedAbsoluteLocationPath + * [10] AbbreviatedAbsoluteLocationPath ::= + * '//' RelativeLocationPath + * + * Compile a location path + * + * // is short for /descendant-or-self::node()/. For example, + * //para is short for /descendant-or-self::node()/child::para and + * so will select any para element in the document (even a para element + * that is a document element will be selected by //para since the + * document element node is a child of the root node); div//para is + * short for div/descendant-or-self::node()/child::para and so will + * select all para descendants of div children. + */ +static void +xmlXPathCompLocationPath(xmlXPathParserContextPtr ctxt) { + SKIP_BLANKS; + if (CUR != '/') { + xmlXPathCompRelativeLocationPath(ctxt); + } else { + while (CUR == '/') { + if ((CUR == '/') && (NXT(1) == '/')) { + SKIP(2); + SKIP_BLANKS; + PUSH_LONG_EXPR(XPATH_OP_COLLECT, AXIS_DESCENDANT_OR_SELF, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + xmlXPathCompRelativeLocationPath(ctxt); + } else if (CUR == '/') { + NEXT; + SKIP_BLANKS; + if ((CUR != 0 ) && + ((IS_LETTER(CUR)) || (CUR == '_') || (CUR == '.') || + (CUR == '@') || (CUR == '*'))) + xmlXPathCompRelativeLocationPath(ctxt); + } + } + } +} + +/************************************************************************ + * * + * XPath precompiled expression evaluation * + * * + ************************************************************************/ + +static int +xmlXPathCompOpEval(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op); + +/** + * xmlXPathNodeCollectAndTest: + * @ctxt: the XPath Parser context + * @op: the XPath precompiled step operation + * @first: pointer to the first element in document order + * @last: pointer to the last element in document order + * + * This is the function implementing a step: based on the current list + * of nodes, it builds up a new list, looking at all nodes under that + * axis and selecting them it also do the predicate filtering + * + * Pushes the new NodeSet resulting from the search. + * + * Returns the number of node traversed + */ +static int +xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt, + xmlXPathStepOpPtr op, + xmlNodePtr * first, xmlNodePtr * last) +{ + xmlXPathAxisVal axis = op->value; + xmlXPathTestVal test = op->value2; + xmlXPathTypeVal type = op->value3; + const xmlChar *prefix = op->value4; + const xmlChar *name = op->value5; + const xmlChar *URI = NULL; + +#ifdef DEBUG_STEP + int n = 0; +#endif + int i, t = 0; + xmlNodeSetPtr ret, list; + xmlXPathTraversalFunction next = NULL; + void (*addNode) (xmlNodeSetPtr, xmlNodePtr); + xmlNodeSetPtr (*mergeNodeSet) (xmlNodeSetPtr, xmlNodeSetPtr); + xmlNodePtr cur = NULL; + xmlXPathObjectPtr obj; + xmlNodeSetPtr nodelist; + xmlNodePtr tmp; + + CHECK_TYPE0(XPATH_NODESET); + obj = valuePop(ctxt); + addNode = xmlXPathNodeSetAdd; + mergeNodeSet = xmlXPathNodeSetMerge; + if (prefix != NULL) { + URI = xmlXPathNsLookup(ctxt->context, prefix); + if (URI == NULL) + XP_ERROR0(XPATH_UNDEF_PREFIX_ERROR); + } +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "new step : "); +#endif + switch (axis) { + case AXIS_ANCESTOR: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'ancestors' "); +#endif + first = NULL; + next = xmlXPathNextAncestor; + break; + case AXIS_ANCESTOR_OR_SELF: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'ancestors-or-self' "); +#endif + first = NULL; + next = xmlXPathNextAncestorOrSelf; + break; + case AXIS_ATTRIBUTE: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'attributes' "); +#endif + first = NULL; + last = NULL; + next = xmlXPathNextAttribute; + mergeNodeSet = xmlXPathNodeSetMergeUnique; + break; + case AXIS_CHILD: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'child' "); +#endif + last = NULL; + next = xmlXPathNextChild; + mergeNodeSet = xmlXPathNodeSetMergeUnique; + break; + case AXIS_DESCENDANT: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'descendant' "); +#endif + last = NULL; + next = xmlXPathNextDescendant; + break; + case AXIS_DESCENDANT_OR_SELF: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'descendant-or-self' "); +#endif + last = NULL; + next = xmlXPathNextDescendantOrSelf; + break; + case AXIS_FOLLOWING: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'following' "); +#endif + last = NULL; + next = xmlXPathNextFollowing; + break; + case AXIS_FOLLOWING_SIBLING: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'following-siblings' "); +#endif + last = NULL; + next = xmlXPathNextFollowingSibling; + break; + case AXIS_NAMESPACE: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'namespace' "); +#endif + first = NULL; + last = NULL; + next = (xmlXPathTraversalFunction) xmlXPathNextNamespace; + mergeNodeSet = xmlXPathNodeSetMergeUnique; + break; + case AXIS_PARENT: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'parent' "); +#endif + first = NULL; + next = xmlXPathNextParent; + break; + case AXIS_PRECEDING: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'preceding' "); +#endif + first = NULL; + next = xmlXPathNextPrecedingInternal; + break; + case AXIS_PRECEDING_SIBLING: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'preceding-sibling' "); +#endif + first = NULL; + next = xmlXPathNextPrecedingSibling; + break; + case AXIS_SELF: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "axis 'self' "); +#endif + first = NULL; + last = NULL; + next = xmlXPathNextSelf; + mergeNodeSet = xmlXPathNodeSetMergeUnique; + break; + } + if (next == NULL) + return(0); + + nodelist = obj->nodesetval; + if (nodelist == NULL) { + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPathWrapNodeSet(NULL)); + return(0); + } + addNode = xmlXPathNodeSetAddUnique; + ret = NULL; +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + " context contains %d nodes\n", nodelist->nodeNr); + switch (test) { + case NODE_TEST_NONE: + xmlGenericError(xmlGenericErrorContext, + " searching for none !!!\n"); + break; + case NODE_TEST_TYPE: + xmlGenericError(xmlGenericErrorContext, + " searching for type %d\n", type); + break; + case NODE_TEST_PI: + xmlGenericError(xmlGenericErrorContext, + " searching for PI !!!\n"); + break; + case NODE_TEST_ALL: + xmlGenericError(xmlGenericErrorContext, + " searching for *\n"); + break; + case NODE_TEST_NS: + xmlGenericError(xmlGenericErrorContext, + " searching for namespace %s\n", + prefix); + break; + case NODE_TEST_NAME: + xmlGenericError(xmlGenericErrorContext, + " searching for name %s\n", name); + if (prefix != NULL) + xmlGenericError(xmlGenericErrorContext, + " with namespace %s\n", prefix); + break; + } + xmlGenericError(xmlGenericErrorContext, "Testing : "); +#endif + /* + * 2.3 Node Tests + * - For the attribute axis, the principal node type is attribute. + * - For the namespace axis, the principal node type is namespace. + * - For other axes, the principal node type is element. + * + * A node test * is true for any node of the + * principal node type. For example, child::* will + * select all element children of the context node + */ + tmp = ctxt->context->node; + for (i = 0; i < nodelist->nodeNr; i++) { + ctxt->context->node = nodelist->nodeTab[i]; + + cur = NULL; + list = xmlXPathNodeSetCreate(NULL); + do { + cur = next(ctxt, cur); + if (cur == NULL) + break; + if ((first != NULL) && (*first == cur)) + break; + if (((t % 256) == 0) && + (first != NULL) && (*first != NULL) && + (xmlXPathCmpNodes(*first, cur) >= 0)) + break; + if ((last != NULL) && (*last == cur)) + break; + if (((t % 256) == 0) && + (last != NULL) && (*last != NULL) && + (xmlXPathCmpNodes(cur, *last) >= 0)) + break; + t++; +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, " %s", cur->name); +#endif + switch (test) { + case NODE_TEST_NONE: + ctxt->context->node = tmp; + STRANGE return(t); + case NODE_TEST_TYPE: + if ((cur->type == type) || + ((type == NODE_TYPE_NODE) && + ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE) || + (cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_NAMESPACE_DECL) || + (cur->type == XML_ATTRIBUTE_NODE) || + (cur->type == XML_PI_NODE) || + (cur->type == XML_COMMENT_NODE) || + (cur->type == XML_CDATA_SECTION_NODE) || + (cur->type == XML_TEXT_NODE))) || + ((type == NODE_TYPE_TEXT) && + (cur->type == XML_CDATA_SECTION_NODE))) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, cur); + } + break; + case NODE_TEST_PI: + if (cur->type == XML_PI_NODE) { + if ((name != NULL) && + (!xmlStrEqual(name, cur->name))) + break; +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, cur); + } + break; + case NODE_TEST_ALL: + if (axis == AXIS_ATTRIBUTE) { + if (cur->type == XML_ATTRIBUTE_NODE) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, cur); + } + } else if (axis == AXIS_NAMESPACE) { + if (cur->type == XML_NAMESPACE_DECL) { +#ifdef DEBUG_STEP + n++; +#endif + xmlXPathNodeSetAddNs(list, ctxt->context->node, + (xmlNsPtr) cur); + } + } else { + if (cur->type == XML_ELEMENT_NODE) { + if (prefix == NULL) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, cur); + } else if ((cur->ns != NULL) && + (xmlStrEqual(URI, cur->ns->href))) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, cur); + } + } + } + break; + case NODE_TEST_NS:{ + TODO; + break; + } + case NODE_TEST_NAME: + switch (cur->type) { + case XML_ELEMENT_NODE: + if (xmlStrEqual(name, cur->name)) { + if (prefix == NULL) { + if (cur->ns == NULL) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, cur); + } + } else { + if ((cur->ns != NULL) && + (xmlStrEqual(URI, + cur->ns->href))) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, cur); + } + } + } + break; + case XML_ATTRIBUTE_NODE:{ + xmlAttrPtr attr = (xmlAttrPtr) cur; + + if (xmlStrEqual(name, attr->name)) { + if (prefix == NULL) { + if ((attr->ns == NULL) || + (attr->ns->prefix == NULL)) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, + (xmlNodePtr) attr); + } + } else { + if ((attr->ns != NULL) && + (xmlStrEqual(URI, + attr->ns-> + href))) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(list, + (xmlNodePtr) attr); + } + } + } + break; + } + case XML_NAMESPACE_DECL: + if (cur->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns = (xmlNsPtr) cur; + + if ((ns->prefix != NULL) && (name != NULL) + && (xmlStrEqual(ns->prefix, name))) { +#ifdef DEBUG_STEP + n++; +#endif + xmlXPathNodeSetAddNs(list, + ctxt->context->node, (xmlNsPtr) cur); + } + } + break; + default: + break; + } + break; + break; + } + } while (cur != NULL); + + /* + * If there is some predicate filtering do it now + */ + if ((op->ch2 != -1) && (list != NULL) && (list->nodeNr > 0)) { + xmlXPathObjectPtr obj2; + + valuePush(ctxt, xmlXPathWrapNodeSet(list)); + xmlXPathCompOpEval(ctxt, &ctxt->comp->steps[op->ch2]); + CHECK_TYPE0(XPATH_NODESET); + obj2 = valuePop(ctxt); + list = obj2->nodesetval; + obj2->nodesetval = NULL; + xmlXPathFreeObject(obj2); + } + if (ret == NULL) { + ret = list; + } else { + ret = mergeNodeSet(ret, list); + xmlXPathFreeNodeSet(list); + } + } + ctxt->context->node = tmp; +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "\nExamined %d nodes, found %d nodes at that step\n", + t, n); +#endif + valuePush(ctxt, xmlXPathWrapNodeSet(ret)); + if ((obj->boolval) && (obj->user != NULL)) { + ctxt->value->boolval = 1; + ctxt->value->user = obj->user; + obj->user = NULL; + obj->boolval = 0; + } + xmlXPathFreeObject(obj); + return(t); +} + +/** + * xmlXPathNodeCollectAndTestNth: + * @ctxt: the XPath Parser context + * @op: the XPath precompiled step operation + * @indx: the index to collect + * @first: pointer to the first element in document order + * @last: pointer to the last element in document order + * + * This is the function implementing a step: based on the current list + * of nodes, it builds up a new list, looking at all nodes under that + * axis and selecting them it also do the predicate filtering + * + * Pushes the new NodeSet resulting from the search. + * Returns the number of node traversed + */ +static int +xmlXPathNodeCollectAndTestNth(xmlXPathParserContextPtr ctxt, + xmlXPathStepOpPtr op, int indx, + xmlNodePtr * first, xmlNodePtr * last) +{ + xmlXPathAxisVal axis = op->value; + xmlXPathTestVal test = op->value2; + xmlXPathTypeVal type = op->value3; + const xmlChar *prefix = op->value4; + const xmlChar *name = op->value5; + const xmlChar *URI = NULL; + int n = 0, t = 0; + + int i; + xmlNodeSetPtr list; + xmlXPathTraversalFunction next = NULL; + void (*addNode) (xmlNodeSetPtr, xmlNodePtr); + xmlNodePtr cur = NULL; + xmlXPathObjectPtr obj; + xmlNodeSetPtr nodelist; + xmlNodePtr tmp; + + CHECK_TYPE0(XPATH_NODESET); + obj = valuePop(ctxt); + addNode = xmlXPathNodeSetAdd; + if (prefix != NULL) { + URI = xmlXPathNsLookup(ctxt->context, prefix); + if (URI == NULL) + XP_ERROR0(XPATH_UNDEF_PREFIX_ERROR); + } +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "new step : "); + if (first != NULL) { + if (*first != NULL) + xmlGenericError(xmlGenericErrorContext, "first = %s ", + (*first)->name); + else + xmlGenericError(xmlGenericErrorContext, "first = NULL "); + } + if (last != NULL) { + if (*last != NULL) + xmlGenericError(xmlGenericErrorContext, "last = %s ", + (*last)->name); + else + xmlGenericError(xmlGenericErrorContext, "last = NULL "); + } +#endif + switch (axis) { + case AXIS_ANCESTOR: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'ancestors' "); +#endif + first = NULL; + next = xmlXPathNextAncestor; + break; + case AXIS_ANCESTOR_OR_SELF: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, + "axis 'ancestors-or-self' "); +#endif + first = NULL; + next = xmlXPathNextAncestorOrSelf; + break; + case AXIS_ATTRIBUTE: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'attributes' "); +#endif + first = NULL; + last = NULL; + next = xmlXPathNextAttribute; + break; + case AXIS_CHILD: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'child' "); +#endif + last = NULL; + next = xmlXPathNextChild; + break; + case AXIS_DESCENDANT: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'descendant' "); +#endif + last = NULL; + next = xmlXPathNextDescendant; + break; + case AXIS_DESCENDANT_OR_SELF: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, + "axis 'descendant-or-self' "); +#endif + last = NULL; + next = xmlXPathNextDescendantOrSelf; + break; + case AXIS_FOLLOWING: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'following' "); +#endif + last = NULL; + next = xmlXPathNextFollowing; + break; + case AXIS_FOLLOWING_SIBLING: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, + "axis 'following-siblings' "); +#endif + last = NULL; + next = xmlXPathNextFollowingSibling; + break; + case AXIS_NAMESPACE: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'namespace' "); +#endif + last = NULL; + first = NULL; + next = (xmlXPathTraversalFunction) xmlXPathNextNamespace; + break; + case AXIS_PARENT: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'parent' "); +#endif + first = NULL; + next = xmlXPathNextParent; + break; + case AXIS_PRECEDING: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'preceding' "); +#endif + first = NULL; + next = xmlXPathNextPrecedingInternal; + break; + case AXIS_PRECEDING_SIBLING: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, + "axis 'preceding-sibling' "); +#endif + first = NULL; + next = xmlXPathNextPrecedingSibling; + break; + case AXIS_SELF: +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, "axis 'self' "); +#endif + first = NULL; + last = NULL; + next = xmlXPathNextSelf; + break; + } + if (next == NULL) + return(0); + + nodelist = obj->nodesetval; + if (nodelist == NULL) { + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPathWrapNodeSet(NULL)); + return(0); + } + addNode = xmlXPathNodeSetAddUnique; +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, + " context contains %d nodes\n", nodelist->nodeNr); + switch (test) { + case NODE_TEST_NONE: + xmlGenericError(xmlGenericErrorContext, + " searching for none !!!\n"); + break; + case NODE_TEST_TYPE: + xmlGenericError(xmlGenericErrorContext, + " searching for type %d\n", type); + break; + case NODE_TEST_PI: + xmlGenericError(xmlGenericErrorContext, + " searching for PI !!!\n"); + break; + case NODE_TEST_ALL: + xmlGenericError(xmlGenericErrorContext, + " searching for *\n"); + break; + case NODE_TEST_NS: + xmlGenericError(xmlGenericErrorContext, + " searching for namespace %s\n", + prefix); + break; + case NODE_TEST_NAME: + xmlGenericError(xmlGenericErrorContext, + " searching for name %s\n", name); + if (prefix != NULL) + xmlGenericError(xmlGenericErrorContext, + " with namespace %s\n", prefix); + break; + } + xmlGenericError(xmlGenericErrorContext, "Testing : "); +#endif + /* + * 2.3 Node Tests + * - For the attribute axis, the principal node type is attribute. + * - For the namespace axis, the principal node type is namespace. + * - For other axes, the principal node type is element. + * + * A node test * is true for any node of the + * principal node type. For example, child::* will + * select all element children of the context node + */ + tmp = ctxt->context->node; + list = xmlXPathNodeSetCreate(NULL); + for (i = 0; i < nodelist->nodeNr; i++) { + ctxt->context->node = nodelist->nodeTab[i]; + + cur = NULL; + n = 0; + do { + cur = next(ctxt, cur); + if (cur == NULL) + break; + if ((first != NULL) && (*first == cur)) + break; + if (((t % 256) == 0) && + (first != NULL) && (*first != NULL) && + (xmlXPathCmpNodes(*first, cur) >= 0)) + break; + if ((last != NULL) && (*last == cur)) + break; + if (((t % 256) == 0) && + (last != NULL) && (*last != NULL) && + (xmlXPathCmpNodes(cur, *last) >= 0)) + break; + t++; + switch (test) { + case NODE_TEST_NONE: + ctxt->context->node = tmp; + STRANGE return(0); + case NODE_TEST_TYPE: + if ((cur->type == type) || + ((type == NODE_TYPE_NODE) && + ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE) || + (cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_PI_NODE) || + (cur->type == XML_COMMENT_NODE) || + (cur->type == XML_CDATA_SECTION_NODE) || + (cur->type == XML_TEXT_NODE))) || + ((type == NODE_TYPE_TEXT) && + (cur->type == XML_CDATA_SECTION_NODE))) { + n++; + if (n == indx) + addNode(list, cur); + } + break; + case NODE_TEST_PI: + if (cur->type == XML_PI_NODE) { + if ((name != NULL) && + (!xmlStrEqual(name, cur->name))) + break; + n++; + if (n == indx) + addNode(list, cur); + } + break; + case NODE_TEST_ALL: + if (axis == AXIS_ATTRIBUTE) { + if (cur->type == XML_ATTRIBUTE_NODE) { + n++; + if (n == indx) + addNode(list, cur); + } + } else if (axis == AXIS_NAMESPACE) { + if (cur->type == XML_NAMESPACE_DECL) { + n++; + if (n == indx) + xmlXPathNodeSetAddNs(list, ctxt->context->node, + (xmlNsPtr) cur); + } + } else { + if (cur->type == XML_ELEMENT_NODE) { + if (prefix == NULL) { + n++; + if (n == indx) + addNode(list, cur); + } else if ((cur->ns != NULL) && + (xmlStrEqual(URI, cur->ns->href))) { + n++; + if (n == indx) + addNode(list, cur); + } + } + } + break; + case NODE_TEST_NS:{ + TODO; + break; + } + case NODE_TEST_NAME: + switch (cur->type) { + case XML_ELEMENT_NODE: + if (xmlStrEqual(name, cur->name)) { + if (prefix == NULL) { + if (cur->ns == NULL) { + n++; + if (n == indx) + addNode(list, cur); + } + } else { + if ((cur->ns != NULL) && + (xmlStrEqual(URI, + cur->ns->href))) { + n++; + if (n == indx) + addNode(list, cur); + } + } + } + break; + case XML_ATTRIBUTE_NODE:{ + xmlAttrPtr attr = (xmlAttrPtr) cur; + + if (xmlStrEqual(name, attr->name)) { + if (prefix == NULL) { + if ((attr->ns == NULL) || + (attr->ns->prefix == NULL)) { + n++; + if (n == indx) + addNode(list, cur); + } + } else { + if ((attr->ns != NULL) && + (xmlStrEqual(URI, + attr->ns-> + href))) { + n++; + if (n == indx) + addNode(list, cur); + } + } + } + break; + } + case XML_NAMESPACE_DECL: + if (cur->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns = (xmlNsPtr) cur; + + if ((ns->prefix != NULL) && (name != NULL) + && (xmlStrEqual(ns->prefix, name))) { + n++; + if (n == indx) + xmlXPathNodeSetAddNs(list, + ctxt->context->node, (xmlNsPtr) cur); + } + } + break; + default: + break; + } + break; + break; + } + } while (n < indx); + } + ctxt->context->node = tmp; +#ifdef DEBUG_STEP_NTH + xmlGenericError(xmlGenericErrorContext, + "\nExamined %d nodes, found %d nodes at that step\n", + t, list->nodeNr); +#endif + valuePush(ctxt, xmlXPathWrapNodeSet(list)); + if ((obj->boolval) && (obj->user != NULL)) { + ctxt->value->boolval = 1; + ctxt->value->user = obj->user; + obj->user = NULL; + obj->boolval = 0; + } + xmlXPathFreeObject(obj); + return(t); +} + +/** + * xmlXPathCompOpEvalFirst: + * @ctxt: the XPath parser context with the compiled expression + * @op: an XPath compiled operation + * @first: the first elem found so far + * + * Evaluate the Precompiled XPath operation searching only the first + * element in document order + * + * Returns the number of examined objects. + */ +static int +xmlXPathCompOpEvalFirst(xmlXPathParserContextPtr ctxt, + xmlXPathStepOpPtr op, xmlNodePtr * first) +{ + int total = 0, cur; + xmlXPathCompExprPtr comp; + xmlXPathObjectPtr arg1, arg2; + + CHECK_ERROR0; + comp = ctxt->comp; + switch (op->op) { + case XPATH_OP_END: + return (0); + case XPATH_OP_UNION: + total = + xmlXPathCompOpEvalFirst(ctxt, &comp->steps[op->ch1], + first); + CHECK_ERROR0; + if ((ctxt->value != NULL) + && (ctxt->value->type == XPATH_NODESET) + && (ctxt->value->nodesetval != NULL) + && (ctxt->value->nodesetval->nodeNr >= 1)) { + /* + * limit tree traversing to first node in the result + */ + xmlXPathNodeSetSort(ctxt->value->nodesetval); + *first = ctxt->value->nodesetval->nodeTab[0]; + } + cur = + xmlXPathCompOpEvalFirst(ctxt, &comp->steps[op->ch2], + first); + CHECK_ERROR0; + CHECK_TYPE0(XPATH_NODESET); + arg2 = valuePop(ctxt); + + CHECK_TYPE0(XPATH_NODESET); + arg1 = valuePop(ctxt); + + arg1->nodesetval = xmlXPathNodeSetMerge(arg1->nodesetval, + arg2->nodesetval); + valuePush(ctxt, arg1); + xmlXPathFreeObject(arg2); + /* optimizer */ + if (total > cur) + xmlXPathCompSwap(op); + return (total + cur); + case XPATH_OP_ROOT: + xmlXPathRoot(ctxt); + return (0); + case XPATH_OP_NODE: + if (op->ch1 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if (op->ch2 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + return (total); + case XPATH_OP_RESET: + if (op->ch1 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if (op->ch2 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + ctxt->context->node = NULL; + return (total); + case XPATH_OP_COLLECT:{ + if (op->ch1 == -1) + return (total); + + total = xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + + /* + * Optimization for [n] selection where n is a number + */ + if ((op->ch2 != -1) && + (comp->steps[op->ch2].op == XPATH_OP_PREDICATE) && + (comp->steps[op->ch2].ch1 == -1) && + (comp->steps[op->ch2].ch2 != -1) && + (comp->steps[comp->steps[op->ch2].ch2].op == + XPATH_OP_VALUE)) { + xmlXPathObjectPtr val; + + val = comp->steps[comp->steps[op->ch2].ch2].value4; + if ((val != NULL) && (val->type == XPATH_NUMBER)) { + int indx = (int) val->floatval; + + if (val->floatval == (float) indx) { + xmlXPathNodeCollectAndTestNth(ctxt, op, indx, + first, NULL); + return (total); + } + } + } + total += xmlXPathNodeCollectAndTest(ctxt, op, first, NULL); + return (total); + } + case XPATH_OP_VALUE: + valuePush(ctxt, + xmlXPathObjectCopy((xmlXPathObjectPtr) op->value4)); + return (0); + case XPATH_OP_SORT: + if (op->ch1 != -1) + total += + xmlXPathCompOpEvalFirst(ctxt, &comp->steps[op->ch1], + first); + CHECK_ERROR0; + if ((ctxt->value != NULL) + && (ctxt->value->type == XPATH_NODESET) + && (ctxt->value->nodesetval != NULL)) + xmlXPathNodeSetSort(ctxt->value->nodesetval); + return (total); + default: + return (xmlXPathCompOpEval(ctxt, op)); + } +} + +/** + * xmlXPathCompOpEvalLast: + * @ctxt: the XPath parser context with the compiled expression + * @op: an XPath compiled operation + * @last: the last elem found so far + * + * Evaluate the Precompiled XPath operation searching only the last + * element in document order + * + * Returns the number of node traversed + */ +static int +xmlXPathCompOpEvalLast(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op, + xmlNodePtr * last) +{ + int total = 0, cur; + xmlXPathCompExprPtr comp; + xmlXPathObjectPtr arg1, arg2; + + CHECK_ERROR0; + comp = ctxt->comp; + switch (op->op) { + case XPATH_OP_END: + return (0); + case XPATH_OP_UNION: + total = + xmlXPathCompOpEvalLast(ctxt, &comp->steps[op->ch1], last); + CHECK_ERROR0; + if ((ctxt->value != NULL) + && (ctxt->value->type == XPATH_NODESET) + && (ctxt->value->nodesetval != NULL) + && (ctxt->value->nodesetval->nodeNr >= 1)) { + /* + * limit tree traversing to first node in the result + */ + xmlXPathNodeSetSort(ctxt->value->nodesetval); + *last = + ctxt->value->nodesetval->nodeTab[ctxt->value-> + nodesetval->nodeNr - + 1]; + } + cur = + xmlXPathCompOpEvalLast(ctxt, &comp->steps[op->ch2], last); + CHECK_ERROR0; + if ((ctxt->value != NULL) + && (ctxt->value->type == XPATH_NODESET) + && (ctxt->value->nodesetval != NULL) + && (ctxt->value->nodesetval->nodeNr >= 1)) { + } + CHECK_TYPE0(XPATH_NODESET); + arg2 = valuePop(ctxt); + + CHECK_TYPE0(XPATH_NODESET); + arg1 = valuePop(ctxt); + + arg1->nodesetval = xmlXPathNodeSetMerge(arg1->nodesetval, + arg2->nodesetval); + valuePush(ctxt, arg1); + xmlXPathFreeObject(arg2); + /* optimizer */ + if (total > cur) + xmlXPathCompSwap(op); + return (total + cur); + case XPATH_OP_ROOT: + xmlXPathRoot(ctxt); + return (0); + case XPATH_OP_NODE: + if (op->ch1 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if (op->ch2 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + return (total); + case XPATH_OP_RESET: + if (op->ch1 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if (op->ch2 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + ctxt->context->node = NULL; + return (total); + case XPATH_OP_COLLECT:{ + if (op->ch1 == -1) + return (0); + + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + + /* + * Optimization for [n] selection where n is a number + */ + if ((op->ch2 != -1) && + (comp->steps[op->ch2].op == XPATH_OP_PREDICATE) && + (comp->steps[op->ch2].ch1 == -1) && + (comp->steps[op->ch2].ch2 != -1) && + (comp->steps[comp->steps[op->ch2].ch2].op == + XPATH_OP_VALUE)) { + xmlXPathObjectPtr val; + + val = comp->steps[comp->steps[op->ch2].ch2].value4; + if ((val != NULL) && (val->type == XPATH_NUMBER)) { + int indx = (int) val->floatval; + + if (val->floatval == (float) indx) { + total += + xmlXPathNodeCollectAndTestNth(ctxt, op, + indx, NULL, + last); + return (total); + } + } + } + total += xmlXPathNodeCollectAndTest(ctxt, op, NULL, last); + return (total); + } + case XPATH_OP_VALUE: + valuePush(ctxt, + xmlXPathObjectCopy((xmlXPathObjectPtr) op->value4)); + return (0); + case XPATH_OP_SORT: + if (op->ch1 != -1) + total += + xmlXPathCompOpEvalLast(ctxt, &comp->steps[op->ch1], + last); + CHECK_ERROR0; + if ((ctxt->value != NULL) + && (ctxt->value->type == XPATH_NODESET) + && (ctxt->value->nodesetval != NULL)) + xmlXPathNodeSetSort(ctxt->value->nodesetval); + return (total); + default: + return (xmlXPathCompOpEval(ctxt, op)); + } +} + +/** + * xmlXPathCompOpEval: + * @ctxt: the XPath parser context with the compiled expression + * @op: an XPath compiled operation + * + * Evaluate the Precompiled XPath operation + * Returns the number of node traversed + */ +static int +xmlXPathCompOpEval(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op) +{ + int total = 0; + int equal, ret; + xmlXPathCompExprPtr comp; + xmlXPathObjectPtr arg1, arg2; + xmlNodePtr bak; + xmlDocPtr bakd; + int pp; + int cs; + + CHECK_ERROR0; + comp = ctxt->comp; + switch (op->op) { + case XPATH_OP_END: + return (0); + case XPATH_OP_AND: + bakd = ctxt->context->doc; + bak = ctxt->context->node; + pp = ctxt->context->proximityPosition; + cs = ctxt->context->contextSize; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + xmlXPathBooleanFunction(ctxt, 1); + if ((ctxt->value == NULL) || (ctxt->value->boolval == 0)) + return (total); + arg2 = valuePop(ctxt); + ctxt->context->doc = bakd; + ctxt->context->node = bak; + ctxt->context->proximityPosition = pp; + ctxt->context->contextSize = cs; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + if (ctxt->error) { + xmlXPathFreeObject(arg2); + return(0); + } + xmlXPathBooleanFunction(ctxt, 1); + arg1 = valuePop(ctxt); + arg1->boolval &= arg2->boolval; + valuePush(ctxt, arg1); + xmlXPathFreeObject(arg2); + return (total); + case XPATH_OP_OR: + bakd = ctxt->context->doc; + bak = ctxt->context->node; + pp = ctxt->context->proximityPosition; + cs = ctxt->context->contextSize; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + xmlXPathBooleanFunction(ctxt, 1); + if ((ctxt->value == NULL) || (ctxt->value->boolval == 1)) + return (total); + arg2 = valuePop(ctxt); + ctxt->context->doc = bakd; + ctxt->context->node = bak; + ctxt->context->proximityPosition = pp; + ctxt->context->contextSize = cs; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + if (ctxt->error) { + xmlXPathFreeObject(arg2); + return(0); + } + xmlXPathBooleanFunction(ctxt, 1); + arg1 = valuePop(ctxt); + arg1->boolval |= arg2->boolval; + valuePush(ctxt, arg1); + xmlXPathFreeObject(arg2); + return (total); + case XPATH_OP_EQUAL: + bakd = ctxt->context->doc; + bak = ctxt->context->node; + pp = ctxt->context->proximityPosition; + cs = ctxt->context->contextSize; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + ctxt->context->doc = bakd; + ctxt->context->node = bak; + ctxt->context->proximityPosition = pp; + ctxt->context->contextSize = cs; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + if (op->value) + equal = xmlXPathEqualValues(ctxt); + else + equal = xmlXPathNotEqualValues(ctxt); + valuePush(ctxt, xmlXPathNewBoolean(equal)); + return (total); + case XPATH_OP_CMP: + bakd = ctxt->context->doc; + bak = ctxt->context->node; + pp = ctxt->context->proximityPosition; + cs = ctxt->context->contextSize; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + ctxt->context->doc = bakd; + ctxt->context->node = bak; + ctxt->context->proximityPosition = pp; + ctxt->context->contextSize = cs; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + ret = xmlXPathCompareValues(ctxt, op->value, op->value2); + valuePush(ctxt, xmlXPathNewBoolean(ret)); + return (total); + case XPATH_OP_PLUS: + bakd = ctxt->context->doc; + bak = ctxt->context->node; + pp = ctxt->context->proximityPosition; + cs = ctxt->context->contextSize; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if (op->ch2 != -1) { + ctxt->context->doc = bakd; + ctxt->context->node = bak; + ctxt->context->proximityPosition = pp; + ctxt->context->contextSize = cs; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + } + CHECK_ERROR0; + if (op->value == 0) + xmlXPathSubValues(ctxt); + else if (op->value == 1) + xmlXPathAddValues(ctxt); + else if (op->value == 2) + xmlXPathValueFlipSign(ctxt); + else if (op->value == 3) { + CAST_TO_NUMBER; + CHECK_TYPE0(XPATH_NUMBER); + } + return (total); + case XPATH_OP_MULT: + bakd = ctxt->context->doc; + bak = ctxt->context->node; + pp = ctxt->context->proximityPosition; + cs = ctxt->context->contextSize; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + ctxt->context->doc = bakd; + ctxt->context->node = bak; + ctxt->context->proximityPosition = pp; + ctxt->context->contextSize = cs; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + if (op->value == 0) + xmlXPathMultValues(ctxt); + else if (op->value == 1) + xmlXPathDivValues(ctxt); + else if (op->value == 2) + xmlXPathModValues(ctxt); + return (total); + case XPATH_OP_UNION: + bakd = ctxt->context->doc; + bak = ctxt->context->node; + pp = ctxt->context->proximityPosition; + cs = ctxt->context->contextSize; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + ctxt->context->doc = bakd; + ctxt->context->node = bak; + ctxt->context->proximityPosition = pp; + ctxt->context->contextSize = cs; + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + CHECK_TYPE0(XPATH_NODESET); + arg2 = valuePop(ctxt); + + CHECK_TYPE0(XPATH_NODESET); + arg1 = valuePop(ctxt); + + arg1->nodesetval = xmlXPathNodeSetMerge(arg1->nodesetval, + arg2->nodesetval); + valuePush(ctxt, arg1); + xmlXPathFreeObject(arg2); + return (total); + case XPATH_OP_ROOT: + xmlXPathRoot(ctxt); + return (total); + case XPATH_OP_NODE: + if (op->ch1 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if (op->ch2 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + return (total); + case XPATH_OP_RESET: + if (op->ch1 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if (op->ch2 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + CHECK_ERROR0; + ctxt->context->node = NULL; + return (total); + case XPATH_OP_COLLECT:{ + if (op->ch1 == -1) + return (total); + + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + + /* + * Optimization for [n] selection where n is a number + */ + if ((op->ch2 != -1) && + (comp->steps[op->ch2].op == XPATH_OP_PREDICATE) && + (comp->steps[op->ch2].ch1 == -1) && + (comp->steps[op->ch2].ch2 != -1) && + (comp->steps[comp->steps[op->ch2].ch2].op == + XPATH_OP_VALUE)) { + xmlXPathObjectPtr val; + + val = comp->steps[comp->steps[op->ch2].ch2].value4; + if ((val != NULL) && (val->type == XPATH_NUMBER)) { + int indx = (int) val->floatval; + + if (val->floatval == (float) indx) { + total += + xmlXPathNodeCollectAndTestNth(ctxt, op, + indx, NULL, + NULL); + return (total); + } + } + } + total += xmlXPathNodeCollectAndTest(ctxt, op, NULL, NULL); + return (total); + } + case XPATH_OP_VALUE: + valuePush(ctxt, + xmlXPathObjectCopy((xmlXPathObjectPtr) op->value4)); + return (total); + case XPATH_OP_VARIABLE:{ + xmlXPathObjectPtr val; + + if (op->ch1 != -1) + total += + xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + if (op->value5 == NULL) { + val = xmlXPathVariableLookup(ctxt->context, op->value4); + if (val == NULL) { + ctxt->error = XPATH_UNDEF_VARIABLE_ERROR; + return(0); + } + valuePush(ctxt, val); + } else { + const xmlChar *URI; + + URI = xmlXPathNsLookup(ctxt->context, op->value5); + if (URI == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompOpEval: variable %s bound to undefined prefix %s\n", + op->value4, op->value5); + return (total); + } + val = xmlXPathVariableLookupNS(ctxt->context, + op->value4, URI); + if (val == NULL) { + ctxt->error = XPATH_UNDEF_VARIABLE_ERROR; + return(0); + } + valuePush(ctxt, val); + } + return (total); + } + case XPATH_OP_FUNCTION:{ + xmlXPathFunction func; + const xmlChar *oldFunc, *oldFuncURI; + int i; + + if (op->ch1 != -1) + total += + xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + if (ctxt->valueNr < op->value) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompOpEval: parameter error\n"); + ctxt->error = XPATH_INVALID_OPERAND; + return (total); + } + for (i = 0; i < op->value; i++) + if (ctxt->valueTab[(ctxt->valueNr - 1) - i] == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompOpEval: parameter error\n"); + ctxt->error = XPATH_INVALID_OPERAND; + return (total); + } + if (op->cache != NULL) + func = (xmlXPathFunction) op->cache; + else { + const xmlChar *URI = NULL; + + if (op->value5 == NULL) + func = + xmlXPathFunctionLookup(ctxt->context, + op->value4); + else { + URI = xmlXPathNsLookup(ctxt->context, op->value5); + if (URI == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompOpEval: function %s bound to undefined prefix %s\n", + op->value4, op->value5); + return (total); + } + func = xmlXPathFunctionLookupNS(ctxt->context, + op->value4, URI); + } + if (func == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompOpEval: function %s not found\n", + op->value4); + XP_ERROR0(XPATH_UNKNOWN_FUNC_ERROR); + } + op->cache = (void *) func; + op->cacheURI = (void *) URI; + } + oldFunc = ctxt->context->function; + oldFuncURI = ctxt->context->functionURI; + ctxt->context->function = op->value4; + ctxt->context->functionURI = op->cacheURI; + func(ctxt, op->value); + ctxt->context->function = oldFunc; + ctxt->context->functionURI = oldFuncURI; + return (total); + } + case XPATH_OP_ARG: + bakd = ctxt->context->doc; + bak = ctxt->context->node; + if (op->ch1 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + ctxt->context->doc = bakd; + ctxt->context->node = bak; + CHECK_ERROR0; + if (op->ch2 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch2]); + ctxt->context->doc = bakd; + ctxt->context->node = bak; + CHECK_ERROR0; + return (total); + case XPATH_OP_PREDICATE: + case XPATH_OP_FILTER:{ + xmlXPathObjectPtr res; + xmlXPathObjectPtr obj, tmp; + xmlNodeSetPtr newset = NULL; + xmlNodeSetPtr oldset; + xmlNodePtr oldnode; + int i; + + /* + * Optimization for ()[1] selection i.e. the first elem + */ + if ((op->ch1 != -1) && (op->ch2 != -1) && + (comp->steps[op->ch1].op == XPATH_OP_SORT) && + (comp->steps[op->ch2].op == XPATH_OP_VALUE)) { + xmlXPathObjectPtr val; + + val = comp->steps[op->ch2].value4; + if ((val != NULL) && (val->type == XPATH_NUMBER) && + (val->floatval == 1.0)) { + xmlNodePtr first = NULL; + + total += + xmlXPathCompOpEvalFirst(ctxt, + &comp->steps[op->ch1], + &first); + CHECK_ERROR0; + /* + * The nodeset should be in document order, + * Keep only the first value + */ + if ((ctxt->value != NULL) && + (ctxt->value->type == XPATH_NODESET) && + (ctxt->value->nodesetval != NULL) && + (ctxt->value->nodesetval->nodeNr > 1)) + ctxt->value->nodesetval->nodeNr = 1; + return (total); + } + } + /* + * Optimization for ()[last()] selection i.e. the last elem + */ + if ((op->ch1 != -1) && (op->ch2 != -1) && + (comp->steps[op->ch1].op == XPATH_OP_SORT) && + (comp->steps[op->ch2].op == XPATH_OP_SORT)) { + int f = comp->steps[op->ch2].ch1; + + if ((f != -1) && + (comp->steps[f].op == XPATH_OP_FUNCTION) && + (comp->steps[f].value5 == NULL) && + (comp->steps[f].value == 0) && + (comp->steps[f].value4 != NULL) && + (xmlStrEqual + (comp->steps[f].value4, BAD_CAST "last"))) { + xmlNodePtr last = NULL; + + total += + xmlXPathCompOpEvalLast(ctxt, + &comp->steps[op->ch1], + &last); + CHECK_ERROR0; + /* + * The nodeset should be in document order, + * Keep only the last value + */ + if ((ctxt->value != NULL) && + (ctxt->value->type == XPATH_NODESET) && + (ctxt->value->nodesetval != NULL) && + (ctxt->value->nodesetval->nodeTab != NULL) && + (ctxt->value->nodesetval->nodeNr > 1)) { + ctxt->value->nodesetval->nodeTab[0] = + ctxt->value->nodesetval->nodeTab[ctxt-> + value-> + nodesetval-> + nodeNr - + 1]; + ctxt->value->nodesetval->nodeNr = 1; + } + return (total); + } + } + + if (op->ch1 != -1) + total += + xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if (op->ch2 == -1) + return (total); + if (ctxt->value == NULL) + return (total); + + oldnode = ctxt->context->node; + +#ifdef LIBXML_XPTR_ENABLED + /* + * Hum are we filtering the result of an XPointer expression + */ + if (ctxt->value->type == XPATH_LOCATIONSET) { + xmlLocationSetPtr newlocset = NULL; + xmlLocationSetPtr oldlocset; + + /* + * Extract the old locset, and then evaluate the result of the + * expression for all the element in the locset. use it to grow + * up a new locset. + */ + CHECK_TYPE0(XPATH_LOCATIONSET); + obj = valuePop(ctxt); + oldlocset = obj->user; + ctxt->context->node = NULL; + + if ((oldlocset == NULL) || (oldlocset->locNr == 0)) { + ctxt->context->contextSize = 0; + ctxt->context->proximityPosition = 0; + if (op->ch2 != -1) + total += + xmlXPathCompOpEval(ctxt, + &comp->steps[op->ch2]); + res = valuePop(ctxt); + if (res != NULL) + xmlXPathFreeObject(res); + valuePush(ctxt, obj); + CHECK_ERROR0; + return (total); + } + newlocset = xmlXPtrLocationSetCreate(NULL); + + for (i = 0; i < oldlocset->locNr; i++) { + /* + * Run the evaluation with a node list made of a + * single item in the nodelocset. + */ + ctxt->context->node = oldlocset->locTab[i]->user; + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + ctxt->context->contextSize = oldlocset->locNr; + ctxt->context->proximityPosition = i + 1; + + if (op->ch2 != -1) + total += + xmlXPathCompOpEval(ctxt, + &comp->steps[op->ch2]); + CHECK_ERROR0; + + /* + * The result of the evaluation need to be tested to + * decided whether the filter succeeded or not + */ + res = valuePop(ctxt); + if (xmlXPathEvaluatePredicateResult(ctxt, res)) { + xmlXPtrLocationSetAdd(newlocset, + xmlXPathObjectCopy + (oldlocset->locTab[i])); + } + + /* + * Cleanup + */ + if (res != NULL) + xmlXPathFreeObject(res); + if (ctxt->value == tmp) { + res = valuePop(ctxt); + xmlXPathFreeObject(res); + } + + ctxt->context->node = NULL; + } + + /* + * The result is used as the new evaluation locset. + */ + xmlXPathFreeObject(obj); + ctxt->context->node = NULL; + ctxt->context->contextSize = -1; + ctxt->context->proximityPosition = -1; + valuePush(ctxt, xmlXPtrWrapLocationSet(newlocset)); + ctxt->context->node = oldnode; + return (total); + } +#endif /* LIBXML_XPTR_ENABLED */ + + /* + * Extract the old set, and then evaluate the result of the + * expression for all the element in the set. use it to grow + * up a new set. + */ + CHECK_TYPE0(XPATH_NODESET); + obj = valuePop(ctxt); + oldset = obj->nodesetval; + + oldnode = ctxt->context->node; + ctxt->context->node = NULL; + + if ((oldset == NULL) || (oldset->nodeNr == 0)) { + ctxt->context->contextSize = 0; + ctxt->context->proximityPosition = 0; + if (op->ch2 != -1) + total += + xmlXPathCompOpEval(ctxt, + &comp->steps[op->ch2]); + CHECK_ERROR0; + res = valuePop(ctxt); + if (res != NULL) + xmlXPathFreeObject(res); + valuePush(ctxt, obj); + ctxt->context->node = oldnode; + CHECK_ERROR0; + } else { + /* + * Initialize the new set. + */ + newset = xmlXPathNodeSetCreate(NULL); + + for (i = 0; i < oldset->nodeNr; i++) { + /* + * Run the evaluation with a node list made of + * a single item in the nodeset. + */ + ctxt->context->node = oldset->nodeTab[i]; + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + ctxt->context->contextSize = oldset->nodeNr; + ctxt->context->proximityPosition = i + 1; + + if (op->ch2 != -1) + total += + xmlXPathCompOpEval(ctxt, + &comp->steps[op->ch2]); + CHECK_ERROR0; + + /* + * The result of the evaluation need to be tested to + * decided whether the filter succeeded or not + */ + res = valuePop(ctxt); + if (xmlXPathEvaluatePredicateResult(ctxt, res)) { + xmlXPathNodeSetAdd(newset, oldset->nodeTab[i]); + } + + /* + * Cleanup + */ + if (res != NULL) + xmlXPathFreeObject(res); + if (ctxt->value == tmp) { + res = valuePop(ctxt); + xmlXPathFreeObject(res); + } + + ctxt->context->node = NULL; + } + + /* + * The result is used as the new evaluation set. + */ + xmlXPathFreeObject(obj); + ctxt->context->node = NULL; + ctxt->context->contextSize = -1; + ctxt->context->proximityPosition = -1; + valuePush(ctxt, xmlXPathWrapNodeSet(newset)); + } + ctxt->context->node = oldnode; + return (total); + } + case XPATH_OP_SORT: + if (op->ch1 != -1) + total += xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + CHECK_ERROR0; + if ((ctxt->value != NULL) && + (ctxt->value->type == XPATH_NODESET) && + (ctxt->value->nodesetval != NULL)) + xmlXPathNodeSetSort(ctxt->value->nodesetval); + return (total); +#ifdef LIBXML_XPTR_ENABLED + case XPATH_OP_RANGETO:{ + xmlXPathObjectPtr range; + xmlXPathObjectPtr res, obj; + xmlXPathObjectPtr tmp; + xmlLocationSetPtr newset = NULL; + xmlNodeSetPtr oldset; + int i; + + if (op->ch1 != -1) + total += + xmlXPathCompOpEval(ctxt, &comp->steps[op->ch1]); + if (op->ch2 == -1) + return (total); + + CHECK_TYPE0(XPATH_NODESET); + obj = valuePop(ctxt); + oldset = obj->nodesetval; + ctxt->context->node = NULL; + + newset = xmlXPtrLocationSetCreate(NULL); + + if (oldset != NULL) { + for (i = 0; i < oldset->nodeNr; i++) { + /* + * Run the evaluation with a node list made of a single item + * in the nodeset. + */ + ctxt->context->node = oldset->nodeTab[i]; + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + + if (op->ch2 != -1) + total += + xmlXPathCompOpEval(ctxt, + &comp->steps[op->ch2]); + CHECK_ERROR0; + + /* + * The result of the evaluation need to be tested to + * decided whether the filter succeeded or not + */ + res = valuePop(ctxt); + range = + xmlXPtrNewRangeNodeObject(oldset->nodeTab[i], + res); + if (range != NULL) { + xmlXPtrLocationSetAdd(newset, range); + } + + /* + * Cleanup + */ + if (res != NULL) + xmlXPathFreeObject(res); + if (ctxt->value == tmp) { + res = valuePop(ctxt); + xmlXPathFreeObject(res); + } + + ctxt->context->node = NULL; + } + } + + /* + * The result is used as the new evaluation set. + */ + xmlXPathFreeObject(obj); + ctxt->context->node = NULL; + ctxt->context->contextSize = -1; + ctxt->context->proximityPosition = -1; + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + return (total); + } +#endif /* LIBXML_XPTR_ENABLED */ + } + xmlGenericError(xmlGenericErrorContext, + "XPath: unknown precompiled operation %d\n", op->op); + return (total); +} + +/** + * xmlXPathRunEval: + * @ctxt: the XPath parser context with the compiled expression + * + * Evaluate the Precompiled XPath expression in the given context. + */ +static void +xmlXPathRunEval(xmlXPathParserContextPtr ctxt) { + xmlXPathCompExprPtr comp; + + if ((ctxt == NULL) || (ctxt->comp == NULL)) + return; + + if (ctxt->valueTab == NULL) { + /* Allocate the value stack */ + ctxt->valueTab = (xmlXPathObjectPtr *) + xmlMalloc(10 * sizeof(xmlXPathObjectPtr)); + if (ctxt->valueTab == NULL) { + xmlFree(ctxt); + } + ctxt->valueNr = 0; + ctxt->valueMax = 10; + ctxt->value = NULL; + } + comp = ctxt->comp; + if(comp->last < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathRunEval: last is less than zero\n"); + return; + } + xmlXPathCompOpEval(ctxt, &comp->steps[comp->last]); +} + +/************************************************************************ + * * + * Public interfaces * + * * + ************************************************************************/ + +/** + * xmlXPathEvalPredicate: + * @ctxt: the XPath context + * @res: the Predicate Expression evaluation result + * + * Evaluate a predicate result for the current node. + * A PredicateExpr is evaluated by evaluating the Expr and converting + * the result to a boolean. If the result is a number, the result will + * be converted to true if the number is equal to the position of the + * context node in the context node list (as returned by the position + * function) and will be converted to false otherwise; if the result + * is not a number, then the result will be converted as if by a call + * to the boolean function. + * + * Returns 1 if predicate is true, 0 otherwise + */ +int +xmlXPathEvalPredicate(xmlXPathContextPtr ctxt, xmlXPathObjectPtr res) { + if (res == NULL) return(0); + switch (res->type) { + case XPATH_BOOLEAN: + return(res->boolval); + case XPATH_NUMBER: + return(res->floatval == ctxt->proximityPosition); + case XPATH_NODESET: + case XPATH_XSLT_TREE: + if (res->nodesetval == NULL) + return(0); + return(res->nodesetval->nodeNr != 0); + case XPATH_STRING: + return((res->stringval != NULL) && + (xmlStrlen(res->stringval) != 0)); + default: + STRANGE + } + return(0); +} + +/** + * xmlXPathEvaluatePredicateResult: + * @ctxt: the XPath Parser context + * @res: the Predicate Expression evaluation result + * + * Evaluate a predicate result for the current node. + * A PredicateExpr is evaluated by evaluating the Expr and converting + * the result to a boolean. If the result is a number, the result will + * be converted to true if the number is equal to the position of the + * context node in the context node list (as returned by the position + * function) and will be converted to false otherwise; if the result + * is not a number, then the result will be converted as if by a call + * to the boolean function. + * + * Returns 1 if predicate is true, 0 otherwise + */ +int +xmlXPathEvaluatePredicateResult(xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr res) { + if (res == NULL) return(0); + switch (res->type) { + case XPATH_BOOLEAN: + return(res->boolval); + case XPATH_NUMBER: + return(res->floatval == ctxt->context->proximityPosition); + case XPATH_NODESET: + case XPATH_XSLT_TREE: + if (res->nodesetval == NULL) + return(0); + return(res->nodesetval->nodeNr != 0); + case XPATH_STRING: + return((res->stringval != NULL) && + (xmlStrlen(res->stringval) != 0)); + default: + STRANGE + } + return(0); +} + +/** + * xmlXPathCompile: + * @str: the XPath expression + * + * Compile an XPath expression + * + * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL. + * the caller has to free the object. + */ +xmlXPathCompExprPtr +xmlXPathCompile(const xmlChar *str) { + xmlXPathParserContextPtr ctxt; + xmlXPathCompExprPtr comp; + + xmlXPathInit(); + + ctxt = xmlXPathNewParserContext(str, NULL); + xmlXPathCompileExpr(ctxt); + + if (*ctxt->cur != 0) { + /* + * aleksey: in some cases this line prints *second* error message + * (see bug #78858) and probably this should be fixed. + * However, we are not sure that all error messages are printed + * out in other places. It's not critical so we leave it as-is for now + */ + xmlXPatherror(ctxt, __FILE__, __LINE__, XPATH_EXPR_ERROR); + comp = NULL; + } else { + comp = ctxt->comp; + ctxt->comp = NULL; + } + xmlXPathFreeParserContext(ctxt); + if (comp != NULL) { + comp->expr = xmlStrdup(str); +#ifdef DEBUG_EVAL_COUNTS + comp->string = xmlStrdup(str); + comp->nb = 0; +#endif + } + return(comp); +} + +/** + * xmlXPathCompiledEval: + * @comp: the compiled XPath expression + * @ctx: the XPath context + * + * Evaluate the Precompiled XPath expression in the given context. + * + * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL. + * the caller has to free the object. + */ +xmlXPathObjectPtr +xmlXPathCompiledEval(xmlXPathCompExprPtr comp, xmlXPathContextPtr ctx) { + xmlXPathParserContextPtr ctxt; + xmlXPathObjectPtr res, tmp, init = NULL; + int stack = 0; +#ifndef LIBXML_THREAD_ENABLED + static int reentance = 0; +#endif + + if ((comp == NULL) || (ctx == NULL)) + return(NULL); + xmlXPathInit(); + + CHECK_CONTEXT(ctx) + +#ifndef LIBXML_THREAD_ENABLED + reentance++; + if (reentance > 1) + xmlXPathDisableOptimizer = 1; +#endif + +#ifdef DEBUG_EVAL_COUNTS + comp->nb++; + if ((comp->string != NULL) && (comp->nb > 100)) { + fprintf(stderr, "100 x %s\n", comp->string); + comp->nb = 0; + } +#endif + ctxt = xmlXPathCompParserContext(comp, ctx); + xmlXPathRunEval(ctxt); + + if (ctxt->value == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompiledEval: evaluation failed\n"); + res = NULL; + } else { + res = valuePop(ctxt); + } + + + do { + tmp = valuePop(ctxt); + if (tmp != NULL) { + if (tmp != init) + stack++; + xmlXPathFreeObject(tmp); + } + } while (tmp != NULL); + if ((stack != 0) && (res != NULL)) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathCompiledEval: %d object left on the stack\n", + stack); + } + if (ctxt->error != XPATH_EXPRESSION_OK) { + xmlXPathFreeObject(res); + res = NULL; + } + + + ctxt->comp = NULL; + xmlXPathFreeParserContext(ctxt); +#ifndef LIBXML_THREAD_ENABLED + reentance--; +#endif + return(res); +} + +/** + * xmlXPathEvalExpr: + * @ctxt: the XPath Parser context + * + * Parse and evaluate an XPath expression in the given context, + * then push the result on the context stack + */ +void +xmlXPathEvalExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathCompileExpr(ctxt); + CHECK_ERROR; + xmlXPathRunEval(ctxt); +} + +/** + * xmlXPathEval: + * @str: the XPath expression + * @ctx: the XPath context + * + * Evaluate the XPath Location Path in the given context. + * + * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL. + * the caller has to free the object. + */ +xmlXPathObjectPtr +xmlXPathEval(const xmlChar *str, xmlXPathContextPtr ctx) { + xmlXPathParserContextPtr ctxt; + xmlXPathObjectPtr res, tmp, init = NULL; + int stack = 0; + + xmlXPathInit(); + + CHECK_CONTEXT(ctx) + + ctxt = xmlXPathNewParserContext(str, ctx); + xmlXPathEvalExpr(ctxt); + + if (ctxt->value == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathEval: evaluation failed\n"); + res = NULL; + } else if (*ctxt->cur != 0) { + xmlXPatherror(ctxt, __FILE__, __LINE__, XPATH_EXPR_ERROR); + res = NULL; + } else { + res = valuePop(ctxt); + } + + do { + tmp = valuePop(ctxt); + if (tmp != NULL) { + if (tmp != init) + stack++; + xmlXPathFreeObject(tmp); + } + } while (tmp != NULL); + if ((stack != 0) && (res != NULL)) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathEval: %d object left on the stack\n", + stack); + } + if (ctxt->error != XPATH_EXPRESSION_OK) { + xmlXPathFreeObject(res); + res = NULL; + } + + xmlXPathFreeParserContext(ctxt); + return(res); +} + +/** + * xmlXPathEvalExpression: + * @str: the XPath expression + * @ctxt: the XPath context + * + * Evaluate the XPath expression in the given context. + * + * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL. + * the caller has to free the object. + */ +xmlXPathObjectPtr +xmlXPathEvalExpression(const xmlChar *str, xmlXPathContextPtr ctxt) { + xmlXPathParserContextPtr pctxt; + xmlXPathObjectPtr res, tmp; + int stack = 0; + + xmlXPathInit(); + + CHECK_CONTEXT(ctxt) + + pctxt = xmlXPathNewParserContext(str, ctxt); + xmlXPathEvalExpr(pctxt); + + if (*pctxt->cur != 0) { + xmlXPatherror(pctxt, __FILE__, __LINE__, XPATH_EXPR_ERROR); + res = NULL; + } else { + res = valuePop(pctxt); + } + do { + tmp = valuePop(pctxt); + if (tmp != NULL) { + xmlXPathFreeObject(tmp); + stack++; + } + } while (tmp != NULL); + if ((stack != 0) && (res != NULL)) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathEvalExpression: %d object left on the stack\n", + stack); + } + xmlXPathFreeParserContext(pctxt); + return(res); +} + +/************************************************************************ + * * + * Extra functions not pertaining to the XPath spec * + * * + ************************************************************************/ +/** + * xmlXPathEscapeUriFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the escape-uri() XPath function + * string escape-uri(string $str, bool $escape-reserved) + * + * This function applies the URI escaping rules defined in section 2 of [RFC + * 2396] to the string supplied as $uri-part, which typically represents all + * or part of a URI. The effect of the function is to replace any special + * character in the string by an escape sequence of the form %xx%yy..., + * where xxyy... is the hexadecimal representation of the octets used to + * represent the character in UTF-8. + * + * The set of characters that are escaped depends on the setting of the + * boolean argument $escape-reserved. + * + * If $escape-reserved is true, all characters are escaped other than lower + * case letters a-z, upper case letters A-Z, digits 0-9, and the characters + * referred to in [RFC 2396] as "marks": specifically, "-" | "_" | "." | "!" + * | "~" | "*" | "'" | "(" | ")". The "%" character itself is escaped only + * if it is not followed by two hexadecimal digits (that is, 0-9, a-f, and + * A-F). + * + * If $escape-reserved is false, the behavior differs in that characters + * referred to in [RFC 2396] as reserved characters are not escaped. These + * characters are ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ",". + * + * [RFC 2396] does not define whether escaped URIs should use lower case or + * upper case for hexadecimal digits. To ensure that escaped URIs can be + * compared using string comparison functions, this function must always use + * the upper-case letters A-F. + * + * Generally, $escape-reserved should be set to true when escaping a string + * that is to form a single part of a URI, and to false when escaping an + * entire URI or URI reference. + * + * In the case of non-ascii characters, the string is encoded according to + * utf-8 and then converted according to RFC 2396. + * + * Examples + * xf:escape-uri ("gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles#ocean"), true()) + * returns "gopher%3A%2F%2Fspinaltap.micro.umn.edu%2F00%2FWeather%2FCalifornia%2FLos%20Angeles%23ocean" + * xf:escape-uri ("gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles#ocean"), false()) + * returns "gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles%23ocean" + * + */ +static void +xmlXPathEscapeUriFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str; + int escape_reserved; + xmlBufferPtr target; + xmlChar *cptr; + xmlChar escape[4]; + + CHECK_ARITY(2); + + escape_reserved = xmlXPathPopBoolean(ctxt); + + CAST_TO_STRING; + str = valuePop(ctxt); + + target = xmlBufferCreate(); + + escape[0] = '%'; + escape[3] = 0; + + if (target) { + for (cptr = str->stringval; *cptr; cptr++) { + if ((*cptr >= 'A' && *cptr <= 'Z') || + (*cptr >= 'a' && *cptr <= 'z') || + (*cptr >= '0' && *cptr <= '9') || + *cptr == '-' || *cptr == '_' || *cptr == '.' || + *cptr == '!' || *cptr == '~' || *cptr == '*' || + *cptr == '\''|| *cptr == '(' || *cptr == ')' || + (*cptr == '%' && + ((cptr[1] >= 'A' && cptr[1] <= 'F') || + (cptr[1] >= 'a' && cptr[1] <= 'f') || + (cptr[1] >= '0' && cptr[1] <= '9')) && + ((cptr[2] >= 'A' && cptr[2] <= 'F') || + (cptr[2] >= 'a' && cptr[2] <= 'f') || + (cptr[2] >= '0' && cptr[2] <= '9'))) || + (!escape_reserved && + (*cptr == ';' || *cptr == '/' || *cptr == '?' || + *cptr == ':' || *cptr == '@' || *cptr == '&' || + *cptr == '=' || *cptr == '+' || *cptr == '$' || + *cptr == ','))) { + xmlBufferAdd(target, cptr, 1); + } else { + if ((*cptr >> 4) < 10) + escape[1] = '0' + (*cptr >> 4); + else + escape[1] = 'A' - 10 + (*cptr >> 4); + if ((*cptr & 0xF) < 10) + escape[2] = '0' + (*cptr & 0xF); + else + escape[2] = 'A' - 10 + (*cptr & 0xF); + + xmlBufferAdd(target, &escape[0], 3); + } + } + } + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + xmlXPathFreeObject(str); +} + +/** + * xmlXPathRegisterAllFunctions: + * @ctxt: the XPath context + * + * Registers all default XPath functions in this context + */ +void +xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt) +{ + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"boolean", + xmlXPathBooleanFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"ceiling", + xmlXPathCeilingFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"count", + xmlXPathCountFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"concat", + xmlXPathConcatFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"contains", + xmlXPathContainsFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"id", + xmlXPathIdFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"false", + xmlXPathFalseFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"floor", + xmlXPathFloorFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"last", + xmlXPathLastFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"lang", + xmlXPathLangFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"local-name", + xmlXPathLocalNameFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"not", + xmlXPathNotFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"name", + xmlXPathNameFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"namespace-uri", + xmlXPathNamespaceURIFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"normalize-space", + xmlXPathNormalizeFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"number", + xmlXPathNumberFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"position", + xmlXPathPositionFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"round", + xmlXPathRoundFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string", + xmlXPathStringFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string-length", + xmlXPathStringLengthFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"starts-with", + xmlXPathStartsWithFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring", + xmlXPathSubstringFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-before", + xmlXPathSubstringBeforeFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-after", + xmlXPathSubstringAfterFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"sum", + xmlXPathSumFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"true", + xmlXPathTrueFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"translate", + xmlXPathTranslateFunction); + + xmlXPathRegisterFuncNS(ctxt, (const xmlChar *)"escape-uri", + (const xmlChar *)"http://www.w3.org/2002/08/xquery-functions", + xmlXPathEscapeUriFunction); +} + +#endif /* LIBXML_XPATH_ENABLED */ diff --git a/bundle/libxml/xpointer.c b/bundle/libxml/xpointer.c new file mode 100644 index 0000000000..4c4cf991c5 --- /dev/null +++ b/bundle/libxml/xpointer.c @@ -0,0 +1,2902 @@ +/* + * xpointer.c : Code to handle XML Pointer + * + * World Wide Web Consortium Working Draft 03-March-1998 + * http://www.w3.org/TR/2000/CR-xptr-20000607 + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +/* + * TODO: better handling of error cases, the full expression should + * be parsed beforehand instead of a progressive evaluation + * TODO: Access into entities references are not supported now ... + * need a start to be able to pop out of entities refs since + * parent is the endity declaration, not the ref. + */ + +#include <string.h> +#include <libxml/xpointer.h> +#include <libxml/xmlmemory.h> +#include <libxml/parserInternals.h> +#include <libxml/uri.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include <libxml/xmlerror.h> +#include <libxml/globals.h> + +#ifdef LIBXML_XPTR_ENABLED + +/* Add support of the xmlns() xpointer scheme to initialize the namespaces */ +#define XPTR_XMLNS_SCHEME + +/* #define DEBUG_RANGES */ +#ifdef DEBUG_RANGES +#ifdef LIBXML_DEBUG_ENABLED +#include <libxml/debugXML.h> +#endif +#endif + +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#define STRANGE \ + xmlGenericError(xmlGenericErrorContext, \ + "Internal error at %s:%d\n", \ + __FILE__, __LINE__); + +/************************************************************************ + * * + * A few helper functions for child sequences * + * * + ************************************************************************/ + +xmlNodePtr xmlXPtrAdvanceNode(xmlNodePtr cur); +/** + * xmlXPtrGetArity: + * @cur: the node + * + * Returns the number of child for an element, -1 in case of error + */ +static int +xmlXPtrGetArity(xmlNodePtr cur) { + int i; + if (cur == NULL) + return(-1); + cur = cur->children; + for (i = 0;cur != NULL;cur = cur->next) { + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + i++; + } + } + return(i); +} + +/** + * xmlXPtrGetIndex: + * @cur: the node + * + * Returns the index of the node in its parent children list, -1 + * in case of error + */ +static int +xmlXPtrGetIndex(xmlNodePtr cur) { + int i; + if (cur == NULL) + return(-1); + for (i = 1;cur != NULL;cur = cur->prev) { + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + i++; + } + } + return(i); +} + +/** + * xmlXPtrGetNthChild: + * @cur: the node + * @no: the child number + * + * Returns the @no'th element child of @cur or NULL + */ +static xmlNodePtr +xmlXPtrGetNthChild(xmlNodePtr cur, int no) { + int i; + if (cur == NULL) + return(cur); + cur = cur->children; + for (i = 0;i <= no;cur = cur->next) { + if (cur == NULL) + return(cur); + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + i++; + if (i == no) + break; + } + } + return(cur); +} + +/************************************************************************ + * * + * Handling of XPointer specific types * + * * + ************************************************************************/ + +/** + * xmlXPtrCmpPoints: + * @node1: the first node + * @index1: the first index + * @node2: the second node + * @index2: the second index + * + * Compare two points w.r.t document order + * + * Returns -2 in case of error 1 if first point < second point, 0 if + * that's the same point, -1 otherwise + */ +static int +xmlXPtrCmpPoints(xmlNodePtr node1, int index1, xmlNodePtr node2, int index2) { + if ((node1 == NULL) || (node2 == NULL)) + return(-2); + /* + * a couple of optimizations which will avoid computations in most cases + */ + if (node1 == node2) { + if (index1 < index2) + return(1); + if (index1 > index2) + return(-1); + return(0); + } + return(xmlXPathCmpNodes(node1, node2)); +} + +/** + * xmlXPtrNewPoint: + * @node: the xmlNodePtr + * @indx: the indx within the node + * + * Create a new xmlXPathObjectPtr of type point + * + * Returns the newly created object. + */ +static xmlXPathObjectPtr +xmlXPtrNewPoint(xmlNodePtr node, int indx) { + xmlXPathObjectPtr ret; + + if (node == NULL) + return(NULL); + if (indx < 0) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewPoint: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_POINT; + ret->user = (void *) node; + ret->index = indx; + return(ret); +} + +/** + * xmlXPtrRangeCheckOrder: + * @range: an object range + * + * Make sure the points in the range are in the right order + */ +static void +xmlXPtrRangeCheckOrder(xmlXPathObjectPtr range) { + int tmp; + xmlNodePtr tmp2; + if (range == NULL) + return; + if (range->type != XPATH_RANGE) + return; + if (range->user2 == NULL) + return; + tmp = xmlXPtrCmpPoints(range->user, range->index, + range->user2, range->index2); + if (tmp == -1) { + tmp2 = range->user; + range->user = range->user2; + range->user2 = tmp2; + tmp = range->index; + range->index = range->index2; + range->index2 = tmp; + } +} + +/** + * xmlXPtrRangesEqual: + * @range1: the first range + * @range2: the second range + * + * Compare two ranges + * + * Returns 1 if equal, 0 otherwise + */ +static int +xmlXPtrRangesEqual(xmlXPathObjectPtr range1, xmlXPathObjectPtr range2) { + if (range1 == range2) + return(1); + if ((range1 == NULL) || (range2 == NULL)) + return(0); + if (range1->type != range2->type) + return(0); + if (range1->type != XPATH_RANGE) + return(0); + if (range1->user != range2->user) + return(0); + if (range1->index != range2->index) + return(0); + if (range1->user2 != range2->user2) + return(0); + if (range1->index2 != range2->index2) + return(0); + return(1); +} + +/** + * xmlXPtrNewRange: + * @start: the starting node + * @startindex: the start index + * @end: the ending point + * @endindex: the ending index + * + * Create a new xmlXPathObjectPtr of type range + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRange(xmlNodePtr start, int startindex, + xmlNodePtr end, int endindex) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + if (startindex < 0) + return(NULL); + if (endindex < 0) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRange: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = startindex; + ret->user2 = end; + ret->index2 = endindex; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewRangePoints: + * @start: the starting point + * @end: the ending point + * + * Create a new xmlXPathObjectPtr of type range using 2 Points + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangePoints(xmlXPathObjectPtr start, xmlXPathObjectPtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + if (start->type != XPATH_POINT) + return(NULL); + if (end->type != XPATH_POINT) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangePoints: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start->user; + ret->index = start->index; + ret->user2 = end->user; + ret->index2 = end->index; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewRangePointNode: + * @start: the starting point + * @end: the ending node + * + * Create a new xmlXPathObjectPtr of type range from a point to a node + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangePointNode(xmlXPathObjectPtr start, xmlNodePtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + if (start->type != XPATH_POINT) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangePointNode: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start->user; + ret->index = start->index; + ret->user2 = end; + ret->index2 = -1; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewRangeNodePoint: + * @start: the starting node + * @end: the ending point + * + * Create a new xmlXPathObjectPtr of type range from a node to a point + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangeNodePoint(xmlNodePtr start, xmlXPathObjectPtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + if (start->type != XPATH_POINT) + return(NULL); + if (end->type != XPATH_POINT) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangeNodePoint: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = -1; + ret->user2 = end->user; + ret->index2 = end->index; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewRangeNodes: + * @start: the starting node + * @end: the ending node + * + * Create a new xmlXPathObjectPtr of type range using 2 nodes + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangeNodes(xmlNodePtr start, xmlNodePtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangeNodes: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = -1; + ret->user2 = end; + ret->index2 = -1; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewCollapsedRange: + * @start: the starting and ending node + * + * Create a new xmlXPathObjectPtr of type range using a single nodes + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewCollapsedRange(xmlNodePtr start) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewCollapsedRange: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = -1; + ret->user2 = NULL; + ret->index2 = -1; + return(ret); +} + +/** + * xmlXPtrNewRangeNodeObject: + * @start: the starting node + * @end: the ending object + * + * Create a new xmlXPathObjectPtr of type range from a not to an object + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangeNodeObject(xmlNodePtr start, xmlXPathObjectPtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + switch (end->type) { + case XPATH_POINT: + break; + case XPATH_NODESET: + /* + * Empty set ... + */ + if (end->nodesetval->nodeNr <= 0) + return(NULL); + break; + default: + TODO + return(NULL); + } + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangeNodeObject: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = -1; + switch (end->type) { + case XPATH_POINT: + ret->user2 = end->user; + ret->index2 = end->index; + case XPATH_NODESET: { + ret->user2 = end->nodesetval->nodeTab[end->nodesetval->nodeNr - 1]; + ret->index2 = -1; + break; + } + default: + STRANGE + return(NULL); + } + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +#define XML_RANGESET_DEFAULT 10 + +/** + * xmlXPtrLocationSetCreate: + * @val: an initial xmlXPathObjectPtr, or NULL + * + * Create a new xmlLocationSetPtr of type double and of value @val + * + * Returns the newly created object. + */ +xmlLocationSetPtr +xmlXPtrLocationSetCreate(xmlXPathObjectPtr val) { + xmlLocationSetPtr ret; + + ret = (xmlLocationSetPtr) xmlMalloc(sizeof(xmlLocationSet)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetCreate: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlLocationSet)); + if (val != NULL) { + ret->locTab = (xmlXPathObjectPtr *) xmlMalloc(XML_RANGESET_DEFAULT * + sizeof(xmlXPathObjectPtr)); + if (ret->locTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetCreate: out of memory\n"); + return(NULL); + } + memset(ret->locTab, 0 , + XML_RANGESET_DEFAULT * (size_t) sizeof(xmlXPathObjectPtr)); + ret->locMax = XML_RANGESET_DEFAULT; + ret->locTab[ret->locNr++] = val; + } + return(ret); +} + +/** + * xmlXPtrLocationSetAdd: + * @cur: the initial range set + * @val: a new xmlXPathObjectPtr + * + * add a new xmlXPathObjectPtr to an existing LocationSet + * If the location already exist in the set @val is freed. + */ +void +xmlXPtrLocationSetAdd(xmlLocationSetPtr cur, xmlXPathObjectPtr val) { + int i; + + if (val == NULL) return; + + /* + * check against doublons + */ + for (i = 0;i < cur->locNr;i++) { + if (xmlXPtrRangesEqual(cur->locTab[i], val)) { + xmlXPathFreeObject(val); + return; + } + } + + /* + * grow the locTab if needed + */ + if (cur->locMax == 0) { + cur->locTab = (xmlXPathObjectPtr *) xmlMalloc(XML_RANGESET_DEFAULT * + sizeof(xmlXPathObjectPtr)); + if (cur->locTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetAdd: out of memory\n"); + return; + } + memset(cur->locTab, 0 , + XML_RANGESET_DEFAULT * (size_t) sizeof(xmlXPathObjectPtr)); + cur->locMax = XML_RANGESET_DEFAULT; + } else if (cur->locNr == cur->locMax) { + xmlXPathObjectPtr *temp; + + cur->locMax *= 2; + temp = (xmlXPathObjectPtr *) xmlRealloc(cur->locTab, cur->locMax * + sizeof(xmlXPathObjectPtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetAdd: out of memory\n"); + return; + } + cur->locTab = temp; + } + cur->locTab[cur->locNr++] = val; +} + +/** + * xmlXPtrLocationSetMerge: + * @val1: the first LocationSet + * @val2: the second LocationSet + * + * Merges two rangesets, all ranges from @val2 are added to @val1 + * + * Returns val1 once extended or NULL in case of error. + */ +xmlLocationSetPtr +xmlXPtrLocationSetMerge(xmlLocationSetPtr val1, xmlLocationSetPtr val2) { + int i; + + if (val1 == NULL) return(NULL); + if (val2 == NULL) return(val1); + + /* + * !!!!! this can be optimized a lot, knowing that both + * val1 and val2 already have unicity of their values. + */ + + for (i = 0;i < val2->locNr;i++) + xmlXPtrLocationSetAdd(val1, val2->locTab[i]); + + return(val1); +} + +/** + * xmlXPtrLocationSetDel: + * @cur: the initial range set + * @val: an xmlXPathObjectPtr + * + * Removes an xmlXPathObjectPtr from an existing LocationSet + */ +void +xmlXPtrLocationSetDel(xmlLocationSetPtr cur, xmlXPathObjectPtr val) { + int i; + + if (cur == NULL) return; + if (val == NULL) return; + + /* + * check against doublons + */ + for (i = 0;i < cur->locNr;i++) + if (cur->locTab[i] == val) break; + + if (i >= cur->locNr) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetDel: Range wasn't found in RangeList\n"); +#endif + return; + } + cur->locNr--; + for (;i < cur->locNr;i++) + cur->locTab[i] = cur->locTab[i + 1]; + cur->locTab[cur->locNr] = NULL; +} + +/** + * xmlXPtrLocationSetRemove: + * @cur: the initial range set + * @val: the index to remove + * + * Removes an entry from an existing LocationSet list. + */ +void +xmlXPtrLocationSetRemove(xmlLocationSetPtr cur, int val) { + if (cur == NULL) return; + if (val >= cur->locNr) return; + cur->locNr--; + for (;val < cur->locNr;val++) + cur->locTab[val] = cur->locTab[val + 1]; + cur->locTab[cur->locNr] = NULL; +} + +/** + * xmlXPtrFreeLocationSet: + * @obj: the xmlLocationSetPtr to free + * + * Free the LocationSet compound (not the actual ranges !). + */ +void +xmlXPtrFreeLocationSet(xmlLocationSetPtr obj) { + int i; + + if (obj == NULL) return; + if (obj->locTab != NULL) { + for (i = 0;i < obj->locNr; i++) { + xmlXPathFreeObject(obj->locTab[i]); + } + xmlFree(obj->locTab); + } + xmlFree(obj); +} + +/** + * xmlXPtrNewLocationSetNodes: + * @start: the start NodePtr value + * @end: the end NodePtr value or NULL + * + * Create a new xmlXPathObjectPtr of type LocationSet and initialize + * it with the single range made of the two nodes @start and @end + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewLocationSetNodes(xmlNodePtr start, xmlNodePtr end) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewLocationSetNodes: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_LOCATIONSET; + if (end == NULL) + ret->user = xmlXPtrLocationSetCreate(xmlXPtrNewCollapsedRange(start)); + else + ret->user = xmlXPtrLocationSetCreate(xmlXPtrNewRangeNodes(start,end)); + return(ret); +} + +/** + * xmlXPtrNewLocationSetNodeSet: + * @set: a node set + * + * Create a new xmlXPathObjectPtr of type LocationSet and initialize + * it with all the nodes from @set + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewLocationSetNodeSet(xmlNodeSetPtr set) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewLocationSetNodeSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_LOCATIONSET; + if (set != NULL) { + int i; + xmlLocationSetPtr newset; + + newset = xmlXPtrLocationSetCreate(NULL); + if (newset == NULL) + return(ret); + + for (i = 0;i < set->nodeNr;i++) + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewCollapsedRange(set->nodeTab[i])); + + ret->user = (void *) newset; + } + return(ret); +} + +/** + * xmlXPtrWrapLocationSet: + * @val: the LocationSet value + * + * Wrap the LocationSet @val in a new xmlXPathObjectPtr + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrWrapLocationSet(xmlLocationSetPtr val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrWrapLocationSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_LOCATIONSET; + ret->user = (void *) val; + return(ret); +} + +/************************************************************************ + * * + * The parser * + * * + ************************************************************************/ + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one need to make assumption on the context to use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * CUR returns the current xmlChar value, i.e. a 8 bit value + * in ISO-Latin or UTF-8. + * This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * CURRENT Returns the current char value, with the full decoding of + * UTF-8 if we are using this mode. It returns an int. + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * It returns the pointer to the current xmlChar. + */ + +#define CUR (*ctxt->cur) +#define SKIP(val) ctxt->cur += (val) +#define NXT(val) ctxt->cur[(val)] +#define CUR_PTR ctxt->cur + +#define SKIP_BLANKS \ + while (IS_BLANK(*(ctxt->cur))) NEXT + +#define CURRENT (*ctxt->cur) +#define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur) + +/* + * xmlXPtrGetChildNo: + * @ctxt: the XPointer Parser context + * @index: the child number + * + * Move the current node of the nodeset on the stack to the + * given child if found + */ +static void +xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt, int indx) { + xmlNodePtr cur = NULL; + xmlXPathObjectPtr obj; + xmlNodeSetPtr oldset; + + CHECK_TYPE(XPATH_NODESET); + obj = valuePop(ctxt); + oldset = obj->nodesetval; + if ((indx <= 0) || (oldset == NULL) || (oldset->nodeNr != 1)) { + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPathNewNodeSet(NULL)); + return; + } + cur = xmlXPtrGetNthChild(oldset->nodeTab[0], indx); + if (cur == NULL) { + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPathNewNodeSet(NULL)); + return; + } + oldset->nodeTab[0] = cur; + valuePush(ctxt, obj); +} + +/** + * xmlXPtrEvalXPtrPart: + * @ctxt: the XPointer Parser context + * @name: the preparsed Scheme for the XPtrPart + * + * XPtrPart ::= 'xpointer' '(' XPtrExpr ')' + * | Scheme '(' SchemeSpecificExpr ')' + * + * Scheme ::= NCName - 'xpointer' [VC: Non-XPointer schemes] + * + * SchemeSpecificExpr ::= StringWithBalancedParens + * + * StringWithBalancedParens ::= + * [^()]* ('(' StringWithBalancedParens ')' [^()]*)* + * [VC: Parenthesis escaping] + * + * XPtrExpr ::= Expr [VC: Parenthesis escaping] + * + * VC: Parenthesis escaping: + * The end of an XPointer part is signaled by the right parenthesis ")" + * character that is balanced with the left parenthesis "(" character + * that began the part. Any unbalanced parenthesis character inside the + * expression, even within literals, must be escaped with a circumflex (^) + * character preceding it. If the expression contains any literal + * occurrences of the circumflex, each must be escaped with an additional + * circumflex (that is, ^^). If the unescaped parentheses in the expression + * are not balanced, a syntax error results. + * + * Parse and evaluate an XPtrPart. Basically it generates the unescaped + * string and if the scheme is 'xpointer' it will call the XPath interpreter. + * + * TODO: there is no new scheme registration mechanism + */ + +static void +xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt, xmlChar *name) { + xmlChar *buffer, *cur; + int len; + int level; + + if (name == NULL) + name = xmlXPathParseName(ctxt); + if (name == NULL) + XP_ERROR(XPATH_EXPR_ERROR); + + if (CUR != '(') + XP_ERROR(XPATH_EXPR_ERROR); + NEXT; + level = 1; + + len = xmlStrlen(ctxt->cur); + len++; + buffer = (xmlChar *) xmlMalloc(len * sizeof (xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrEvalXPtrPart: out of memory\n"); + return; + } + + cur = buffer; + while (CUR != 0) { + if (CUR == ')') { + level--; + if (level == 0) { + NEXT; + break; + } + *cur++ = CUR; + } else if (CUR == '(') { + level++; + *cur++ = CUR; + } else if (CUR == '^') { + NEXT; + if ((CUR == ')') || (CUR == '(') || (CUR == '^')) { + *cur++ = CUR; + } else { + *cur++ = '^'; + *cur++ = CUR; + } + } else { + *cur++ = CUR; + } + NEXT; + } + *cur = 0; + + if ((level != 0) && (CUR == 0)) { + xmlFree(buffer); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + + if (xmlStrEqual(name, (xmlChar *) "xpointer")) { + const xmlChar *left = CUR_PTR; + + CUR_PTR = buffer; + xmlXPathEvalExpr(ctxt); + CUR_PTR=left; +#ifdef XPTR_XMLNS_SCHEME + } else if (xmlStrEqual(name, (xmlChar *) "xmlns")) { + const xmlChar *left = CUR_PTR; + xmlChar *prefix; + xmlChar *URI; + xmlURIPtr value; + + CUR_PTR = buffer; + prefix = xmlXPathParseNCName(ctxt); + if (prefix == NULL) { + xmlFree(buffer); + xmlFree(name); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + SKIP_BLANKS; + if (CUR != '=') { + xmlFree(prefix); + xmlFree(buffer); + xmlFree(name); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + NEXT; + SKIP_BLANKS; + /* @@ check escaping in the XPointer WD */ + + value = xmlParseURI((const char *)ctxt->cur); + if (value == NULL) { + xmlFree(prefix); + xmlFree(buffer); + xmlFree(name); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + URI = xmlSaveUri(value); + xmlFreeURI(value); + if (URI == NULL) { + xmlFree(prefix); + xmlFree(buffer); + xmlFree(name); + XP_ERROR(XPATH_MEMORY_ERROR); + } + + xmlXPathRegisterNs(ctxt->context, prefix, URI); + CUR_PTR = left; + xmlFree(URI); + xmlFree(prefix); +#endif /* XPTR_XMLNS_SCHEME */ + } else { + xmlGenericError(xmlGenericErrorContext, + "unsupported scheme '%s'\n", name); + } + xmlFree(buffer); + xmlFree(name); +} + +/** + * xmlXPtrEvalFullXPtr: + * @ctxt: the XPointer Parser context + * @name: the preparsed Scheme for the first XPtrPart + * + * FullXPtr ::= XPtrPart (S? XPtrPart)* + * + * As the specs says: + * ----------- + * When multiple XPtrParts are provided, they must be evaluated in + * left-to-right order. If evaluation of one part fails, the nexti + * is evaluated. The following conditions cause XPointer part failure: + * + * - An unknown scheme + * - A scheme that does not locate any sub-resource present in the resource + * - A scheme that is not applicable to the media type of the resource + * + * The XPointer application must consume a failed XPointer part and + * attempt to evaluate the next one, if any. The result of the first + * XPointer part whose evaluation succeeds is taken to be the fragment + * located by the XPointer as a whole. If all the parts fail, the result + * for the XPointer as a whole is a sub-resource error. + * ----------- + * + * Parse and evaluate a Full XPtr i.e. possibly a cascade of XPath based + * expressions or other schemes. + */ +static void +xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt, xmlChar *name) { + if (name == NULL) + name = xmlXPathParseName(ctxt); + if (name == NULL) + XP_ERROR(XPATH_EXPR_ERROR); + while (name != NULL) { + xmlXPtrEvalXPtrPart(ctxt, name); + + /* in case of syntax error, break here */ + if (ctxt->error != XPATH_EXPRESSION_OK) + return; + + /* + * If the returned value is a non-empty nodeset + * or location set, return here. + */ + if (ctxt->value != NULL) { + xmlXPathObjectPtr obj = ctxt->value; + + switch (obj->type) { + case XPATH_LOCATIONSET: { + xmlLocationSetPtr loc = ctxt->value->user; + if ((loc != NULL) && (loc->locNr > 0)) + return; + break; + } + case XPATH_NODESET: { + xmlNodeSetPtr loc = ctxt->value->nodesetval; + if ((loc != NULL) && (loc->nodeNr > 0)) + return; + break; + } + default: + break; + } + + /* + * Evaluating to improper values is equivalent to + * a sub-resource error, clean-up the stack + */ + do { + obj = valuePop(ctxt); + if (obj != NULL) { + xmlXPathFreeObject(obj); + } + } while (obj != NULL); + } + + /* + * Is there another XPointer part. + */ + SKIP_BLANKS; + name = xmlXPathParseName(ctxt); + } +} + +/** + * xmlXPtrEvalChildSeq: + * @ctxt: the XPointer Parser context + * @name: a possible ID name of the child sequence + * + * ChildSeq ::= '/1' ('/' [0-9]*)* + * | Name ('/' [0-9]*)+ + * + * Parse and evaluate a Child Sequence. This routine also handle the + * case of a Bare Name used to get a document ID. + */ +static void +xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name) { + /* + * XPointer don't allow by syntax to address in mutirooted trees + * this might prove useful in some cases, warn about it. + */ + if ((name == NULL) && (CUR == '/') && (NXT(1) != '1')) { + xmlGenericError(xmlGenericErrorContext, + "warning: ChildSeq not starting by /1\n"); + } + + if (name != NULL) { + valuePush(ctxt, xmlXPathNewString(name)); + xmlFree(name); + xmlXPathIdFunction(ctxt, 1); + CHECK_ERROR; + } + + while (CUR == '/') { + int child = 0; + NEXT; + + while ((CUR >= '0') && (CUR <= '9')) { + child = child * 10 + (CUR - '0'); + NEXT; + } + xmlXPtrGetChildNo(ctxt, child); + } +} + + +/** + * xmlXPtrEvalXPointer: + * @ctxt: the XPointer Parser context + * + * XPointer ::= Name + * | ChildSeq + * | FullXPtr + * + * Parse and evaluate an XPointer + */ +static void +xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt) { + if (ctxt->valueTab == NULL) { + /* Allocate the value stack */ + ctxt->valueTab = (xmlXPathObjectPtr *) + xmlMalloc(10 * sizeof(xmlXPathObjectPtr)); + if (ctxt->valueTab == NULL) { + xmlFree(ctxt); + xmlGenericError(xmlGenericErrorContext, + "xmlXPathEvalXPointer: out of memory\n"); + return; + } + ctxt->valueNr = 0; + ctxt->valueMax = 10; + ctxt->value = NULL; + } + SKIP_BLANKS; + if (CUR == '/') { + xmlXPathRoot(ctxt); + xmlXPtrEvalChildSeq(ctxt, NULL); + } else { + xmlChar *name; + + name = xmlXPathParseName(ctxt); + if (name == NULL) + XP_ERROR(XPATH_EXPR_ERROR); + if (CUR == '(') { + xmlXPtrEvalFullXPtr(ctxt, name); + /* Short evaluation */ + return; + } else { + /* this handle both Bare Names and Child Sequences */ + xmlXPtrEvalChildSeq(ctxt, name); + } + } + SKIP_BLANKS; + if (CUR != 0) + XP_ERROR(XPATH_EXPR_ERROR); +} + + +/************************************************************************ + * * + * General routines * + * * + ************************************************************************/ + +void xmlXPtrStringRangeFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrStartPointFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrEndPointFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrHereFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrOriginFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrRangeInsideFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrRangeFunction(xmlXPathParserContextPtr ctxt, int nargs); + +/** + * xmlXPtrNewContext: + * @doc: the XML document + * @here: the node that directly contains the XPointer being evaluated or NULL + * @origin: the element from which a user or program initiated traversal of + * the link, or NULL. + * + * Create a new XPointer context + * + * Returns the xmlXPathContext just allocated. + */ +xmlXPathContextPtr +xmlXPtrNewContext(xmlDocPtr doc, xmlNodePtr here, xmlNodePtr origin) { + xmlXPathContextPtr ret; + + ret = xmlXPathNewContext(doc); + if (ret == NULL) + return(ret); + ret->xptr = 1; + ret->here = here; + ret->origin = origin; + + xmlXPathRegisterFunc(ret, (xmlChar *)"range-to", + xmlXPtrRangeToFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"range", + xmlXPtrRangeFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"range-inside", + xmlXPtrRangeInsideFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"string-range", + xmlXPtrStringRangeFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"start-point", + xmlXPtrStartPointFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"end-point", + xmlXPtrEndPointFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"here", + xmlXPtrHereFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)" origin", + xmlXPtrOriginFunction); + + return(ret); +} + +/** + * xmlXPtrEval: + * @str: the XPointer expression + * @ctx: the XPointer context + * + * Evaluate the XPath Location Path in the given context. + * + * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL. + * the caller has to free the object. + */ +xmlXPathObjectPtr +xmlXPtrEval(const xmlChar *str, xmlXPathContextPtr ctx) { + xmlXPathParserContextPtr ctxt; + xmlXPathObjectPtr res = NULL, tmp; + xmlXPathObjectPtr init = NULL; + int stack = 0; + + xmlXPathInit(); + + if ((ctx == NULL) || (str == NULL)) + return(NULL); + + ctxt = xmlXPathNewParserContext(str, ctx); + ctxt->xptr = 1; + xmlXPtrEvalXPointer(ctxt); + + if ((ctxt->value != NULL) && + (ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_LOCATIONSET)) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrEval: evaluation failed to return a node set\n"); + } else { + res = valuePop(ctxt); + } + + do { + tmp = valuePop(ctxt); + if (tmp != NULL) { + if (tmp != init) { + if (tmp->type == XPATH_NODESET) { + /* + * Evaluation may push a root nodeset which is unused + */ + xmlNodeSetPtr set; + set = tmp->nodesetval; + if ((set->nodeNr != 1) || + (set->nodeTab[0] != (xmlNodePtr) ctx->doc)) + stack++; + } else + stack++; + } + xmlXPathFreeObject(tmp); + } + } while (tmp != NULL); + if (stack != 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrEval: %d object left on the stack\n", + stack); + } + if (ctxt->error != XPATH_EXPRESSION_OK) { + xmlXPathFreeObject(res); + res = NULL; + } + + xmlXPathFreeParserContext(ctxt); + return(res); +} + +/** + * xmlXPtrBuildRangeNodeList: + * @range: a range object + * + * Build a node list tree copy of the range + * + * Returns an xmlNodePtr list or NULL. + * the caller has to free the node tree. + */ +static xmlNodePtr +xmlXPtrBuildRangeNodeList(xmlXPathObjectPtr range) { + /* pointers to generated nodes */ + xmlNodePtr list = NULL, last = NULL, parent = NULL, tmp; + /* pointers to traversal nodes */ + xmlNodePtr start, cur, end; + int index1, index2; + + if (range == NULL) + return(NULL); + if (range->type != XPATH_RANGE) + return(NULL); + start = (xmlNodePtr) range->user; + + if (start == NULL) + return(NULL); + end = range->user2; + if (end == NULL) + return(xmlCopyNode(start, 1)); + + cur = start; + index1 = range->index; + index2 = range->index2; + while (cur != NULL) { + if (cur == end) { + if (cur->type == XML_TEXT_NODE) { + const xmlChar *content = cur->content; + int len; + + if (content == NULL) { + tmp = xmlNewTextLen(NULL, 0); + } else { + len = index2; + if ((cur == start) && (index1 > 1)) { + content += (index1 - 1); + len -= (index1 - 1); + index1 = 0; + } else { + len = index2; + } + tmp = xmlNewTextLen(content, len); + } + /* single sub text node selection */ + if (list == NULL) + return(tmp); + /* prune and return full set */ + if (last != NULL) + xmlAddNextSibling(last, tmp); + else + xmlAddChild(parent, tmp); + return(list); + } else { + tmp = xmlCopyNode(cur, 0); + if (list == NULL) + list = tmp; + else { + if (last != NULL) + xmlAddNextSibling(last, tmp); + else + xmlAddChild(parent, tmp); + } + last = NULL; + parent = tmp; + + if (index2 > 1) { + end = xmlXPtrGetNthChild(cur, index2 - 1); + index2 = 0; + } + if ((cur == start) && (index1 > 1)) { + cur = xmlXPtrGetNthChild(cur, index1 - 1); + index1 = 0; + } else { + cur = cur->children; + } + /* + * Now gather the remaining nodes from cur to end + */ + continue; /* while */ + } + } else if ((cur == start) && + (list == NULL) /* looks superfluous but ... */ ) { + if ((cur->type == XML_TEXT_NODE) || + (cur->type == XML_CDATA_SECTION_NODE)) { + const xmlChar *content = cur->content; + + if (content == NULL) { + tmp = xmlNewTextLen(NULL, 0); + } else { + if (index1 > 1) { + content += (index1 - 1); + } + tmp = xmlNewText(content); + } + last = list = tmp; + } else { + if ((cur == start) && (index1 > 1)) { + tmp = xmlCopyNode(cur, 0); + list = tmp; + parent = tmp; + last = NULL; + cur = xmlXPtrGetNthChild(cur, index1 - 1); + index1 = 0; + /* + * Now gather the remaining nodes from cur to end + */ + continue; /* while */ + } + tmp = xmlCopyNode(cur, 1); + list = tmp; + parent = NULL; + last = tmp; + } + } else { + tmp = NULL; + switch (cur->type) { + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_NODE: + /* Do not copy DTD informations */ + break; + case XML_ENTITY_DECL: + TODO /* handle crossing entities -> stack needed */ + break; + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + /* don't consider it part of the tree content */ + break; + case XML_ATTRIBUTE_NODE: + /* Humm, should not happen ! */ + STRANGE + break; + default: + tmp = xmlCopyNode(cur, 1); + break; + } + if (tmp != NULL) { + if ((list == NULL) || ((last == NULL) && (parent == NULL))) { + STRANGE + return(NULL); + } + if (last != NULL) + xmlAddNextSibling(last, tmp); + else { + xmlAddChild(parent, tmp); + last = tmp; + } + } + } + /* + * Skip to next node in document order + */ + if ((list == NULL) || ((last == NULL) && (parent == NULL))) { + STRANGE + return(NULL); + } + cur = xmlXPtrAdvanceNode(cur); + } + return(list); +} + +/** + * xmlXPtrBuildNodeList: + * @obj: the XPointer result from the evaluation. + * + * Build a node list tree copy of the XPointer result. + * This will drop Attributes and Namespace declarations. + * + * Returns an xmlNodePtr list or NULL. + * the caller has to free the node tree. + */ +xmlNodePtr +xmlXPtrBuildNodeList(xmlXPathObjectPtr obj) { + xmlNodePtr list = NULL, last = NULL; + int i; + + if (obj == NULL) + return(NULL); + switch (obj->type) { + case XPATH_NODESET: { + xmlNodeSetPtr set = obj->nodesetval; + if (set == NULL) + return(NULL); + for (i = 0;i < set->nodeNr;i++) { + if (set->nodeTab[i] == NULL) + continue; + switch (set->nodeTab[i]->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ELEMENT_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + break; + case XML_ATTRIBUTE_NODE: + case XML_NAMESPACE_DECL: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + continue; /* for */ + } + if (last == NULL) + list = last = xmlCopyNode(set->nodeTab[i], 1); + else { + xmlAddNextSibling(last, xmlCopyNode(set->nodeTab[i], 1)); + if (last->next != NULL) + last = last->next; + } + } + break; + } + case XPATH_LOCATIONSET: { + xmlLocationSetPtr set = (xmlLocationSetPtr) obj->user; + if (set == NULL) + return(NULL); + for (i = 0;i < set->locNr;i++) { + if (last == NULL) + list = last = xmlXPtrBuildNodeList(set->locTab[i]); + else + xmlAddNextSibling(last, + xmlXPtrBuildNodeList(set->locTab[i])); + if (last != NULL) { + while (last->next != NULL) + last = last->next; + } + } + break; + } + case XPATH_RANGE: + return(xmlXPtrBuildRangeNodeList(obj)); + case XPATH_POINT: + return(xmlCopyNode(obj->user, 0)); + default: + break; + } + return(list); +} + +/************************************************************************ + * * + * XPointer functions * + * * + ************************************************************************/ + +/** + * xmlXPtrNbLocChildren: + * @node: an xmlNodePtr + * + * Count the number of location children of @node or the length of the + * string value in case of text/PI/Comments nodes + * + * Returns the number of location children + */ +static int +xmlXPtrNbLocChildren(xmlNodePtr node) { + int ret = 0; + if (node == NULL) + return(-1); + switch (node->type) { + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_ELEMENT_NODE: + node = node->children; + while (node != NULL) { + if (node->type == XML_ELEMENT_NODE) + ret++; + node = node->next; + } + break; + case XML_ATTRIBUTE_NODE: + return(-1); + + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + ret = xmlStrlen(node->content); + break; + default: + return(-1); + } + return(ret); +} + +/** + * xmlXPtrHereFunction: + * @ctxt: the XPointer Parser context + * @nargs: the number of args + * + * Function implementing here() operation + * as described in 5.4.3 + */ +void +xmlXPtrHereFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + + if (ctxt->context->here == NULL) + XP_ERROR(XPTR_SYNTAX_ERROR); + + valuePush(ctxt, xmlXPtrNewLocationSetNodes(ctxt->context->here, NULL)); +} + +/** + * xmlXPtrOriginFunction: + * @ctxt: the XPointer Parser context + * @nargs: the number of args + * + * Function implementing origin() operation + * as described in 5.4.3 + */ +void +xmlXPtrOriginFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + + if (ctxt->context->origin == NULL) + XP_ERROR(XPTR_SYNTAX_ERROR); + + valuePush(ctxt, xmlXPtrNewLocationSetNodes(ctxt->context->origin, NULL)); +} + +/** + * xmlXPtrStartPointFunction: + * @ctxt: the XPointer Parser context + * @nargs: the number of args + * + * Function implementing start-point() operation + * as described in 5.4.3 + * ---------------- + * location-set start-point(location-set) + * + * For each location x in the argument location-set, start-point adds a + * location of type point to the result location-set. That point represents + * the start point of location x and is determined by the following rules: + * + * - If x is of type point, the start point is x. + * - If x is of type range, the start point is the start point of x. + * - If x is of type root, element, text, comment, or processing instruction, + * - the container node of the start point is x and the index is 0. + * - If x is of type attribute or namespace, the function must signal a + * syntax error. + * ---------------- + * + */ +void +xmlXPtrStartPointFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr tmp, obj, point; + xmlLocationSetPtr newset = NULL; + xmlLocationSetPtr oldset = NULL; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + obj = valuePop(ctxt); + if (obj->type == XPATH_NODESET) { + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(obj->nodesetval); + xmlXPathFreeObject(obj); + obj = tmp; + } + + newset = xmlXPtrLocationSetCreate(NULL); + if (newset == NULL) { + xmlXPathFreeObject(obj); + XP_ERROR(XPATH_MEMORY_ERROR); + } + oldset = (xmlLocationSetPtr) obj->user; + if (oldset != NULL) { + int i; + + for (i = 0; i < oldset->locNr; i++) { + tmp = oldset->locTab[i]; + if (tmp == NULL) + continue; + point = NULL; + switch (tmp->type) { + case XPATH_POINT: + point = xmlXPtrNewPoint(tmp->user, tmp->index); + break; + case XPATH_RANGE: { + xmlNodePtr node = tmp->user; + if (node != NULL) { + if (node->type == XML_ATTRIBUTE_NODE) { + /* TODO: Namespace Nodes ??? */ + xmlXPathFreeObject(obj); + xmlXPtrFreeLocationSet(newset); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + point = xmlXPtrNewPoint(node, tmp->index); + } + break; + } + default: + /*** Should we raise an error ? + xmlXPathFreeObject(obj); + xmlXPathFreeObject(newset); + XP_ERROR(XPATH_INVALID_TYPE) + ***/ + break; + } + if (point != NULL) + xmlXPtrLocationSetAdd(newset, point); + } + } + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); +} + +/** + * xmlXPtrEndPointFunction: + * @ctxt: the XPointer Parser context + * @nargs: the number of args + * + * Function implementing end-point() operation + * as described in 5.4.3 + * ---------------------------- + * location-set end-point(location-set) + * + * For each location x in the argument location-set, end-point adds a + * location of type point to the result location-set. That point represents + * the end point of location x and is determined by the following rules: + * + * - If x is of type point, the resulting point is x. + * - If x is of type range, the resulting point is the end point of x. + * - If x is of type root or element, the container node of the resulting + * point is x and the index is the number of location children of x. + * - If x is of type text, comment, or processing instruction, the container + * node of the resulting point is x and the index is the length of the + * string-value of x. + * - If x is of type attribute or namespace, the function must signal a + * syntax error. + * ---------------------------- + */ +void +xmlXPtrEndPointFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr tmp, obj, point; + xmlLocationSetPtr newset = NULL; + xmlLocationSetPtr oldset = NULL; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + obj = valuePop(ctxt); + if (obj->type == XPATH_NODESET) { + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(obj->nodesetval); + xmlXPathFreeObject(obj); + obj = tmp; + } + + newset = xmlXPtrLocationSetCreate(NULL); + oldset = (xmlLocationSetPtr) obj->user; + if (oldset != NULL) { + int i; + + for (i = 0; i < oldset->locNr; i++) { + tmp = oldset->locTab[i]; + if (tmp == NULL) + continue; + point = NULL; + switch (tmp->type) { + case XPATH_POINT: + point = xmlXPtrNewPoint(tmp->user, tmp->index); + break; + case XPATH_RANGE: { + xmlNodePtr node = tmp->user2; + if (node != NULL) { + if (node->type == XML_ATTRIBUTE_NODE) { + /* TODO: Namespace Nodes ??? */ + xmlXPathFreeObject(obj); + xmlXPtrFreeLocationSet(newset); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + point = xmlXPtrNewPoint(node, tmp->index2); + } else if (tmp->user == NULL) { + point = xmlXPtrNewPoint(node, + xmlXPtrNbLocChildren(node)); + } + break; + } + default: + /*** Should we raise an error ? + xmlXPathFreeObject(obj); + xmlXPathFreeObject(newset); + XP_ERROR(XPATH_INVALID_TYPE) + ***/ + break; + } + if (point != NULL) + xmlXPtrLocationSetAdd(newset, point); + } + } + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); +} + + +/** + * xmlXPtrCoveringRange: + * @ctxt: the XPointer Parser context + * @loc: the location for which the covering range must be computed + * + * A covering range is a range that wholly encompasses a location + * Section 5.3.3. Covering Ranges for All Location Types + * http://www.w3.org/TR/xptr#N2267 + * + * Returns a new location or NULL in case of error + */ +static xmlXPathObjectPtr +xmlXPtrCoveringRange(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr loc) { + if (loc == NULL) + return(NULL); + if ((ctxt == NULL) || (ctxt->context == NULL) || + (ctxt->context->doc == NULL)) + return(NULL); + switch (loc->type) { + case XPATH_POINT: + return(xmlXPtrNewRange(loc->user, loc->index, + loc->user, loc->index)); + case XPATH_RANGE: + if (loc->user2 != NULL) { + return(xmlXPtrNewRange(loc->user, loc->index, + loc->user2, loc->index2)); + } else { + xmlNodePtr node = (xmlNodePtr) loc->user; + if (node == (xmlNodePtr) ctxt->context->doc) { + return(xmlXPtrNewRange(node, 0, node, + xmlXPtrGetArity(node))); + } else { + switch (node->type) { + case XML_ATTRIBUTE_NODE: + /* !!! our model is slightly different than XPath */ + return(xmlXPtrNewRange(node, 0, node, + xmlXPtrGetArity(node))); + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: { + int indx = xmlXPtrGetIndex(node); + + node = node->parent; + return(xmlXPtrNewRange(node, indx - 1, + node, indx + 1)); + } + default: + return(NULL); + } + } + } + default: + TODO /* missed one case ??? */ + } + return(NULL); +} + +/** + * xmlXPtrRangeFunction: + * @ctxt: the XPointer Parser context + * @nargs: the number of args + * + * Function implementing the range() function 5.4.3 + * location-set range(location-set ) + * + * The range function returns ranges covering the locations in + * the argument location-set. For each location x in the argument + * location-set, a range location representing the covering range of + * x is added to the result location-set. + */ +void +xmlXPtrRangeFunction(xmlXPathParserContextPtr ctxt, int nargs) { + int i; + xmlXPathObjectPtr set; + xmlLocationSetPtr oldset; + xmlLocationSetPtr newset; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + set = valuePop(ctxt); + if (set->type == XPATH_NODESET) { + xmlXPathObjectPtr tmp; + + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(set->nodesetval); + xmlXPathFreeObject(set); + set = tmp; + } + oldset = (xmlLocationSetPtr) set->user; + + /* + * The loop is to compute the covering range for each item and add it + */ + newset = xmlXPtrLocationSetCreate(NULL); + for (i = 0;i < oldset->locNr;i++) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrCoveringRange(ctxt, oldset->locTab[i])); + } + + /* + * Save the new value and cleanup + */ + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + xmlXPathFreeObject(set); +} + +/** + * xmlXPtrInsideRange: + * @ctxt: the XPointer Parser context + * @loc: the location for which the inside range must be computed + * + * A inside range is a range described in the range-inside() description + * + * Returns a new location or NULL in case of error + */ +static xmlXPathObjectPtr +xmlXPtrInsideRange(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr loc) { + if (loc == NULL) + return(NULL); + if ((ctxt == NULL) || (ctxt->context == NULL) || + (ctxt->context->doc == NULL)) + return(NULL); + switch (loc->type) { + case XPATH_POINT: { + xmlNodePtr node = (xmlNodePtr) loc->user; + switch (node->type) { + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: { + if (node->content == NULL) { + return(xmlXPtrNewRange(node, 0, node, 0)); + } else { + return(xmlXPtrNewRange(node, 0, node, + xmlStrlen(node->content))); + } + } + case XML_ATTRIBUTE_NODE: + case XML_ELEMENT_NODE: + case XML_ENTITY_REF_NODE: + case XML_DOCUMENT_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: { + return(xmlXPtrNewRange(node, 0, node, + xmlXPtrGetArity(node))); + } + default: + break; + } + return(NULL); + } + case XPATH_RANGE: { + xmlNodePtr node = (xmlNodePtr) loc->user; + if (loc->user2 != NULL) { + return(xmlXPtrNewRange(node, loc->index, + loc->user2, loc->index2)); + } else { + switch (node->type) { + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: { + if (node->content == NULL) { + return(xmlXPtrNewRange(node, 0, node, 0)); + } else { + return(xmlXPtrNewRange(node, 0, node, + xmlStrlen(node->content))); + } + } + case XML_ATTRIBUTE_NODE: + case XML_ELEMENT_NODE: + case XML_ENTITY_REF_NODE: + case XML_DOCUMENT_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: { + return(xmlXPtrNewRange(node, 0, node, + xmlXPtrGetArity(node))); + } + default: + break; + } + return(NULL); + } + } + default: + TODO /* missed one case ??? */ + } + return(NULL); +} + +/** + * xmlXPtrRangeInsideFunction: + * @ctxt: the XPointer Parser context + * @nargs: the number of args + * + * Function implementing the range-inside() function 5.4.3 + * location-set range-inside(location-set ) + * + * The range-inside function returns ranges covering the contents of + * the locations in the argument location-set. For each location x in + * the argument location-set, a range location is added to the result + * location-set. If x is a range location, then x is added to the + * result location-set. If x is not a range location, then x is used + * as the container location of the start and end points of the range + * location to be added; the index of the start point of the range is + * zero; if the end point is a character point then its index is the + * length of the string-value of x, and otherwise is the number of + * location children of x. + * + */ +void +xmlXPtrRangeInsideFunction(xmlXPathParserContextPtr ctxt, int nargs) { + int i; + xmlXPathObjectPtr set; + xmlLocationSetPtr oldset; + xmlLocationSetPtr newset; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + set = valuePop(ctxt); + if (set->type == XPATH_NODESET) { + xmlXPathObjectPtr tmp; + + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(set->nodesetval); + xmlXPathFreeObject(set); + set = tmp; + } + oldset = (xmlLocationSetPtr) set->user; + + /* + * The loop is to compute the covering range for each item and add it + */ + newset = xmlXPtrLocationSetCreate(NULL); + for (i = 0;i < oldset->locNr;i++) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrInsideRange(ctxt, oldset->locTab[i])); + } + + /* + * Save the new value and cleanup + */ + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + xmlXPathFreeObject(set); +} + +/** + * xmlXPtrRangeToFunction: + * @ctxt: the XPointer Parser context + * @nargs: the number of args + * + * Implement the range-to() XPointer function + */ +void +xmlXPtrRangeToFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr range; + const xmlChar *cur; + xmlXPathObjectPtr res, obj; + xmlXPathObjectPtr tmp; + xmlLocationSetPtr newset = NULL; + xmlNodeSetPtr oldset; + int i; + + CHECK_ARITY(1); + /* + * Save the expression pointer since we will have to evaluate + * it multiple times. Initialize the new set. + */ + CHECK_TYPE(XPATH_NODESET); + obj = valuePop(ctxt); + oldset = obj->nodesetval; + ctxt->context->node = NULL; + + cur = ctxt->cur; + newset = xmlXPtrLocationSetCreate(NULL); + + for (i = 0; i < oldset->nodeNr; i++) { + ctxt->cur = cur; + + /* + * Run the evaluation with a node list made of a single item + * in the nodeset. + */ + ctxt->context->node = oldset->nodeTab[i]; + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + + xmlXPathEvalExpr(ctxt); + CHECK_ERROR; + + /* + * The result of the evaluation need to be tested to + * decided whether the filter succeeded or not + */ + res = valuePop(ctxt); + range = xmlXPtrNewRangeNodeObject(oldset->nodeTab[i], res); + if (range != NULL) { + xmlXPtrLocationSetAdd(newset, range); + } + + /* + * Cleanup + */ + if (res != NULL) + xmlXPathFreeObject(res); + if (ctxt->value == tmp) { + res = valuePop(ctxt); + xmlXPathFreeObject(res); + } + + ctxt->context->node = NULL; + } + + /* + * The result is used as the new evaluation set. + */ + xmlXPathFreeObject(obj); + ctxt->context->node = NULL; + ctxt->context->contextSize = -1; + ctxt->context->proximityPosition = -1; + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); +} + +/** + * xmlXPtrAdvanceNode: + * @cur: the node + * + * Advance to the next element or text node in document order + * TODO: add a stack for entering/exiting entities + * + * Returns -1 in case of failure, 0 otherwise + */ +xmlNodePtr +xmlXPtrAdvanceNode(xmlNodePtr cur) { +next: + if (cur == NULL) + return(NULL); + if (cur->children != NULL) { + cur = cur->children ; + goto found; + } + if (cur->next != NULL) { + cur = cur->next; + goto found; + } + do { + cur = cur->parent; + if (cur == NULL) return(NULL); + if (cur->next != NULL) { + cur = cur->next; + goto found; + } + } while (cur != NULL); + +found: + if ((cur->type != XML_ELEMENT_NODE) && + (cur->type != XML_TEXT_NODE) && + (cur->type != XML_DOCUMENT_NODE) && + (cur->type != XML_HTML_DOCUMENT_NODE) && + (cur->type != XML_CDATA_SECTION_NODE)) + goto next; + if (cur->type == XML_ENTITY_REF_NODE) { + TODO + } + return(cur); +} + +/** + * xmlXPtrAdvanceChar: + * @node: the node + * @indx: the indx + * @bytes: the number of bytes + * + * Advance a point of the associated number of bytes (not UTF8 chars) + * + * Returns -1 in case of failure, 0 otherwise + */ +static int +xmlXPtrAdvanceChar(xmlNodePtr *node, int *indx, int bytes) { + xmlNodePtr cur; + int pos; + int len; + + if ((node == NULL) || (indx == NULL)) + return(-1); + cur = *node; + if (cur == NULL) + return(-1); + pos = *indx; + + while (bytes >= 0) { + /* + * First position to the beginning of the first text node + * corresponding to this point + */ + while ((cur != NULL) && + ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE))) { + if (pos > 0) { + cur = xmlXPtrGetNthChild(cur, pos); + pos = 0; + } else { + cur = xmlXPtrAdvanceNode(cur); + pos = 0; + } + } + + if (cur == NULL) { + *node = NULL; + *indx = 0; + return(-1); + } + + /* + * if there is no move needed return the current value. + */ + if (pos == 0) pos = 1; + if (bytes == 0) { + *node = cur; + *indx = pos; + return(0); + } + /* + * We should have a text (or cdata) node ... + */ + len = 0; + if ((cur->type != XML_ELEMENT_NODE) && + (cur->content != NULL)) { + len = xmlStrlen(cur->content); + } + if (pos > len) { + /* Strange, the indx in the text node is greater than it's len */ + STRANGE + pos = len; + } + if (pos + bytes >= len) { + bytes -= (len - pos); + cur = xmlXPtrAdvanceNode(cur); + cur = 0; + } else if (pos + bytes < len) { + pos += bytes; + *node = cur; + *indx = pos; + return(0); + } + } + return(-1); +} + +/** + * xmlXPtrMatchString: + * @string: the string to search + * @start: the start textnode + * @startindex: the start index + * @end: the end textnode IN/OUT + * @endindex: the end index IN/OUT + * + * Check whether the document contains @string at the position + * (@start, @startindex) and limited by the (@end, @endindex) point + * + * Returns -1 in case of failure, 0 if not found, 1 if found in which case + * (@start, @startindex) will indicate the position of the beginning + * of the range and (@end, @endindex) will indicate the end + * of the range + */ +static int +xmlXPtrMatchString(const xmlChar *string, xmlNodePtr start, int startindex, + xmlNodePtr *end, int *endindex) { + xmlNodePtr cur; + int pos; /* 0 based */ + int len; /* in bytes */ + int stringlen; /* in bytes */ + int match; + + if (string == NULL) + return(-1); + if (start == NULL) + return(-1); + if ((end == NULL) || (endindex == NULL)) + return(-1); + cur = start; + if (cur == NULL) + return(-1); + pos = startindex - 1; + stringlen = xmlStrlen(string); + + while (stringlen > 0) { + if ((cur == *end) && (pos + stringlen > *endindex)) + return(0); + + if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) { + len = xmlStrlen(cur->content); + if (len >= pos + stringlen) { + match = (!xmlStrncmp(&cur->content[pos], string, stringlen)); + if (match) { +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "found range %d bytes at index %d of ->", + stringlen, pos + 1); + xmlDebugDumpString(stdout, cur->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + *end = cur; + *endindex = pos + stringlen; + return(1); + } else { + return(0); + } + } else { + int sub = len - pos; + match = (!xmlStrncmp(&cur->content[pos], string, sub)); + if (match) { +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "found subrange %d bytes at index %d of ->", + sub, pos + 1); + xmlDebugDumpString(stdout, cur->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + string = &string[sub]; + stringlen -= sub; + } else { + return(0); + } + } + } + cur = xmlXPtrAdvanceNode(cur); + if (cur == NULL) + return(0); + pos = 0; + } + return(1); +} + +/** + * xmlXPtrSearchString: + * @string: the string to search + * @start: the start textnode IN/OUT + * @startindex: the start index IN/OUT + * @end: the end textnode + * @endindex: the end index + * + * Search the next occurrence of @string within the document content + * until the (@end, @endindex) point is reached + * + * Returns -1 in case of failure, 0 if not found, 1 if found in which case + * (@start, @startindex) will indicate the position of the beginning + * of the range and (@end, @endindex) will indicate the end + * of the range + */ +static int +xmlXPtrSearchString(const xmlChar *string, xmlNodePtr *start, int *startindex, + xmlNodePtr *end, int *endindex) { + xmlNodePtr cur; + const xmlChar *str; + int pos; /* 0 based */ + int len; /* in bytes */ + xmlChar first; + + if (string == NULL) + return(-1); + if ((start == NULL) || (startindex == NULL)) + return(-1); + if ((end == NULL) || (endindex == NULL)) + return(-1); + cur = *start; + if (cur == NULL) + return(-1); + pos = *startindex - 1; + first = string[0]; + + while (cur != NULL) { + if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) { + len = xmlStrlen(cur->content); + while (pos <= len) { + if (first != 0) { + str = xmlStrchr(&cur->content[pos], first); + if (str != NULL) { + pos = (str - (xmlChar *)(cur->content)); +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "found '%c' at index %d of ->", + first, pos + 1); + xmlDebugDumpString(stdout, cur->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + if (xmlXPtrMatchString(string, cur, pos + 1, + end, endindex)) { + *start = cur; + *startindex = pos + 1; + return(1); + } + pos++; + } else { + pos = len + 1; + } + } else { + /* + * An empty string is considered to match before each + * character of the string-value and after the final + * character. + */ +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "found '' at index %d of ->", + pos + 1); + xmlDebugDumpString(stdout, cur->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + *start = cur; + *startindex = pos + 1; + *end = cur; + *endindex = pos + 1; + return(1); + } + } + } + if ((cur == *end) && (pos >= *endindex)) + return(0); + cur = xmlXPtrAdvanceNode(cur); + if (cur == NULL) + return(0); + pos = 1; + } + return(0); +} + +/** + * xmlXPtrGetLastChar: + * @node: the node + * @index: the index + * + * Computes the point coordinates of the last char of this point + * + * Returns -1 in case of failure, 0 otherwise + */ +static int +xmlXPtrGetLastChar(xmlNodePtr *node, int *indx) { + xmlNodePtr cur; + int pos, len = 0; + + if ((node == NULL) || (indx == NULL)) + return(-1); + cur = *node; + pos = *indx; + + if (cur == NULL) + return(-1); + + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + if (pos > 0) { + cur = xmlXPtrGetNthChild(cur, pos); + pos = 0; + } + } + while (cur != NULL) { + if (cur->last != NULL) + cur = cur->last; + else if ((cur->type != XML_ELEMENT_NODE) && + (cur->content != NULL)) { + len = xmlStrlen(cur->content); + break; + } else { + return(-1); + } + } + if (cur == NULL) + return(-1); + *node = cur; + *indx = len; + return(0); +} + +/** + * xmlXPtrGetStartPoint: + * @obj: an range + * @node: the resulting node + * @indx: the resulting index + * + * read the object and return the start point coordinates. + * + * Returns -1 in case of failure, 0 otherwise + */ +static int +xmlXPtrGetStartPoint(xmlXPathObjectPtr obj, xmlNodePtr *node, int *indx) { + if ((obj == NULL) || (node == NULL) || (indx == NULL)) + return(-1); + + switch (obj->type) { + case XPATH_POINT: + *node = obj->user; + if (obj->index <= 0) + *indx = 0; + else + *indx = obj->index; + return(0); + case XPATH_RANGE: + *node = obj->user; + if (obj->index <= 0) + *indx = 0; + else + *indx = obj->index; + return(0); + default: + break; + } + return(-1); +} + +/** + * xmlXPtrGetEndPoint: + * @obj: an range + * @node: the resulting node + * @indx: the resulting indx + * + * read the object and return the end point coordinates. + * + * Returns -1 in case of failure, 0 otherwise + */ +static int +xmlXPtrGetEndPoint(xmlXPathObjectPtr obj, xmlNodePtr *node, int *indx) { + if ((obj == NULL) || (node == NULL) || (indx == NULL)) + return(-1); + + switch (obj->type) { + case XPATH_POINT: + *node = obj->user; + if (obj->index <= 0) + *indx = 0; + else + *indx = obj->index; + return(0); + case XPATH_RANGE: + *node = obj->user; + if (obj->index <= 0) + *indx = 0; + else + *indx = obj->index; + return(0); + default: + break; + } + return(-1); +} + +/** + * xmlXPtrStringRangeFunction: + * @ctxt: the XPointer Parser context + * @nargs: the number of args + * + * Function implementing the string-range() function + * range as described in 5.4.2 + * + * ------------------------------ + * [Definition: For each location in the location-set argument, + * string-range returns a set of string ranges, a set of substrings in a + * string. Specifically, the string-value of the location is searched for + * substrings that match the string argument, and the resulting location-set + * will contain a range location for each non-overlapping match.] + * An empty string is considered to match before each character of the + * string-value and after the final character. Whitespace in a string + * is matched literally, with no normalization except that provided by + * XML for line ends. The third argument gives the position of the first + * character to be in the resulting range, relative to the start of the + * match. The default value is 1, which makes the range start immediately + * before the first character of the matched string. The fourth argument + * gives the number of characters in the range; the default is that the + * range extends to the end of the matched string. + * + * Element boundaries, as well as entire embedded nodes such as processing + * instructions and comments, are ignored as defined in [XPath]. + * + * If the string in the second argument is not found in the string-value + * of the location, or if a value in the third or fourth argument indicates + * a string that is beyond the beginning or end of the document, the + * expression fails. + * + * The points of the range-locations in the returned location-set will + * all be character points. + * ------------------------------ + */ +void +xmlXPtrStringRangeFunction(xmlXPathParserContextPtr ctxt, int nargs) { + int i, startindex, endindex, fendindex; + xmlNodePtr start, end, fend; + xmlXPathObjectPtr set; + xmlLocationSetPtr oldset; + xmlLocationSetPtr newset; + xmlXPathObjectPtr string; + xmlXPathObjectPtr position = NULL; + xmlXPathObjectPtr number = NULL; + int found, pos = 0, num = 0; + + /* + * Grab the arguments + */ + if ((nargs < 2) || (nargs > 4)) + XP_ERROR(XPATH_INVALID_ARITY); + + if (nargs >= 4) { + CHECK_TYPE(XPATH_NUMBER); + number = valuePop(ctxt); + if (number != NULL) + num = (int) number->floatval; + } + if (nargs >= 3) { + CHECK_TYPE(XPATH_NUMBER); + position = valuePop(ctxt); + if (position != NULL) + pos = (int) position->floatval; + } + CHECK_TYPE(XPATH_STRING); + string = valuePop(ctxt); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + set = valuePop(ctxt); + if (set->type == XPATH_NODESET) { + xmlXPathObjectPtr tmp; + + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(set->nodesetval); + xmlXPathFreeObject(set); + set = tmp; + } + oldset = (xmlLocationSetPtr) set->user; + + /* + * The loop is to search for each element in the location set + * the list of location set corresponding to that search + */ + newset = xmlXPtrLocationSetCreate(NULL); + for (i = 0;i < oldset->locNr;i++) { +#ifdef DEBUG_RANGES + xmlXPathDebugDumpObject(stdout, oldset->locTab[i], 0); +#endif + + xmlXPtrGetStartPoint(oldset->locTab[i], &start, &startindex); + xmlXPtrGetEndPoint(oldset->locTab[i], &end, &endindex); + xmlXPtrAdvanceChar(&start, &startindex, 0); + xmlXPtrGetLastChar(&end, &endindex); + +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "from index %d of ->", startindex); + xmlDebugDumpString(stdout, start->content); + xmlGenericError(xmlGenericErrorContext, "\n"); + xmlGenericError(xmlGenericErrorContext, + "to index %d of ->", endindex); + xmlDebugDumpString(stdout, end->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + do { + fend = end; + fendindex = endindex; + found = xmlXPtrSearchString(string->stringval, &start, &startindex, + &fend, &fendindex); + if (found == 1) { + if (position == NULL) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewRange(start, startindex, fend, fendindex)); + } else if (xmlXPtrAdvanceChar(&start, &startindex, + pos - 1) == 0) { + if ((number != NULL) && (num > 0)) { + int rindx; + xmlNodePtr rend; + rend = start; + rindx = startindex - 1; + if (xmlXPtrAdvanceChar(&rend, &rindx, + num) == 0) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewRange(start, startindex, + rend, rindx)); + } + } else if ((number != NULL) && (num <= 0)) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewRange(start, startindex, + start, startindex)); + } else { + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewRange(start, startindex, + fend, fendindex)); + } + } + start = fend; + startindex = fendindex; + if (string->stringval[0] == 0) + startindex++; + } + } while (found == 1); + } + + /* + * Save the new value and cleanup + */ + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + xmlXPathFreeObject(set); + xmlXPathFreeObject(string); + if (position) xmlXPathFreeObject(position); + if (number) xmlXPathFreeObject(number); +} + +/** + * xmlXPtrEvalRangePredicate: + * @ctxt: the XPointer Parser context + * + * [8] Predicate ::= '[' PredicateExpr ']' + * [9] PredicateExpr ::= Expr + * + * Evaluate a predicate as in xmlXPathEvalPredicate() but for + * a Location Set instead of a node set + */ +void +xmlXPtrEvalRangePredicate(xmlXPathParserContextPtr ctxt) { + const xmlChar *cur; + xmlXPathObjectPtr res; + xmlXPathObjectPtr obj, tmp; + xmlLocationSetPtr newset = NULL; + xmlLocationSetPtr oldset; + int i; + + SKIP_BLANKS; + if (CUR != '[') { + XP_ERROR(XPATH_INVALID_PREDICATE_ERROR); + } + NEXT; + SKIP_BLANKS; + + /* + * Extract the old set, and then evaluate the result of the + * expression for all the element in the set. use it to grow + * up a new set. + */ + CHECK_TYPE(XPATH_LOCATIONSET); + obj = valuePop(ctxt); + oldset = obj->user; + ctxt->context->node = NULL; + + if ((oldset == NULL) || (oldset->locNr == 0)) { + ctxt->context->contextSize = 0; + ctxt->context->proximityPosition = 0; + xmlXPathEvalExpr(ctxt); + res = valuePop(ctxt); + if (res != NULL) + xmlXPathFreeObject(res); + valuePush(ctxt, obj); + CHECK_ERROR; + } else { + /* + * Save the expression pointer since we will have to evaluate + * it multiple times. Initialize the new set. + */ + cur = ctxt->cur; + newset = xmlXPtrLocationSetCreate(NULL); + + for (i = 0; i < oldset->locNr; i++) { + ctxt->cur = cur; + + /* + * Run the evaluation with a node list made of a single item + * in the nodeset. + */ + ctxt->context->node = oldset->locTab[i]->user; + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + ctxt->context->contextSize = oldset->locNr; + ctxt->context->proximityPosition = i + 1; + + xmlXPathEvalExpr(ctxt); + CHECK_ERROR; + + /* + * The result of the evaluation need to be tested to + * decided whether the filter succeeded or not + */ + res = valuePop(ctxt); + if (xmlXPathEvaluatePredicateResult(ctxt, res)) { + xmlXPtrLocationSetAdd(newset, + xmlXPathObjectCopy(oldset->locTab[i])); + } + + /* + * Cleanup + */ + if (res != NULL) + xmlXPathFreeObject(res); + if (ctxt->value == tmp) { + res = valuePop(ctxt); + xmlXPathFreeObject(res); + } + + ctxt->context->node = NULL; + } + + /* + * The result is used as the new evaluation set. + */ + xmlXPathFreeObject(obj); + ctxt->context->node = NULL; + ctxt->context->contextSize = -1; + ctxt->context->proximityPosition = -1; + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + } + if (CUR != ']') { + XP_ERROR(XPATH_INVALID_PREDICATE_ERROR); + } + + NEXT; + SKIP_BLANKS; +} + +#else +#endif + diff --git a/ext/xml/CREDITS b/ext/xml/CREDITS index 9f781a8c9c..b9cbfdd5af 100644 --- a/ext/xml/CREDITS +++ b/ext/xml/CREDITS @@ -1,2 +1,2 @@ XML -Stig Bakken, Thies C. Arntzen +Stig Bakken, Thies C. Arntzen, Sterling Hughes diff --git a/ext/xml/compat.c b/ext/xml/compat.c new file mode 100644 index 0000000000..8176631fa0 --- /dev/null +++ b/ext/xml/compat.c @@ -0,0 +1,369 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 4 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2003 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Sterling Hughes <sterling@php.net> | + +----------------------------------------------------------------------+ + */ + +#include "php.h" +#include "expat_compat.h" + +#ifdef LIBXML_EXPAT_COMPAT + +#define IS_NS_DECL(__ns) \ + ((__ns) != NULL && strlen(__ns) == 5 && *(__ns) == 'x' && *((__ns)+1) == 'm' && \ + *((__ns)+2) == 'l' && *((__ns)+3) == 'n' && *((__ns)+4) == 's') + +static void +_find_namespace_decl(XML_Parser parser, const xmlChar *tagname, const xmlChar **attr) +{ + xmlChar **attr_p = (xmlChar **) attr; + xmlChar *name; + xmlChar *value; + xmlChar *partial; + xmlChar *namespace; + + while (attr_p && *attr_p) { + name = attr_p[0]; + value = xmlStrdup(attr_p[1]); + + partial = xmlSplitQName(parser->parser, name, &namespace); + if (IS_NS_DECL(namespace)) { + if (parser->h_start_ns) { + parser->h_start_ns(parser->user, partial, (const XML_Char *) value); + } + xmlHashAddEntry(parser->_ns_map, partial, value); + xmlHashAddEntry(parser->_reverse_ns_map, tagname, xmlStrdup(partial)); + break; + } + + xmlFree(value); + attr_p += 2; + } +} + +static void +_qualify_namespace(XML_Parser parser, const xmlChar *name, xmlChar **qualified) +{ + xmlChar *partial; + xmlChar *namespace; + int len; + + partial = xmlSplitQName(parser->parser, name, &namespace); + if (namespace) { + xmlChar *nsvalue; + + nsvalue = xmlHashLookup(parser->_ns_map, namespace); + if (nsvalue) { + len = strlen(nsvalue) + strlen(partial) + 1; /* colon */ + *qualified = malloc(len+1); + memcpy(*qualified, nsvalue, strlen(nsvalue)); + memcpy(*qualified + strlen(nsvalue), ":", 1); + memcpy(*qualified + strlen(nsvalue) + 1, partial, strlen(partial)); + (*qualified)[len] = '\0'; + } else { + *qualified = xmlStrdup(name); + } + } else { + *qualified = xmlStrdup(name); + } +} + +static void +_start_element_handler(void *user, const xmlChar *name, const xmlChar **attributes) +{ + XML_Parser parser = (XML_Parser) user; + xmlChar *qualified_name = NULL; + + if (parser->namespace) { + _find_namespace_decl(parser, name, attributes); + _qualify_namespace(parser, name, &qualified_name); + } else { + qualified_name = xmlStrdup(name); + } + + parser->h_start_element(parser->user, (const XML_Char *) qualified_name, (const XML_Char **) attributes); + + xmlFree(qualified_name); +} + +static void +_end_element_handler(void *user, const xmlChar *name) +{ + xmlChar *qualified_name; + XML_Parser parser = (XML_Parser) user; + + if (parser->namespace) { + xmlChar *nsname; + + nsname = xmlHashLookup(parser->_reverse_ns_map, name); + if (nsname && parser->h_end_ns) { + parser->h_end_ns(parser->user, nsname); + } + + _qualify_namespace(parser, name, &qualified_name); + } else { + qualified_name = xmlStrdup(name); + } + + parser->h_end_element(parser->user, (const XML_Char *) qualified_name); + + xmlFree(qualified_name); +} + +static void +_cdata_handler(void *user, const xmlChar *cdata, int cdata_len) +{ + XML_Parser parser = (XML_Parser) user; + + parser->h_cdata(parser->user, (const XML_Char *) cdata, cdata_len); +} + +static void +_pi_handler(void *user, const xmlChar *target, const xmlChar *data) +{ + XML_Parser parser = (XML_Parser) user; + + parser->h_pi(parser->user, (const XML_Char *) target, (const XML_Char *) data); +} + +static void +_unparsed_entity_decl_handler(void *user, + const xmlChar *name, + const xmlChar *sys_id, + const xmlChar *pub_id, + const xmlChar *notation) +{ + XML_Parser parser = (XML_Parser) user; + + parser->h_unparsed_entity_decl(parser->user, name, NULL, sys_id, pub_id, notation); +} + +static void +_notation_decl_handler(void *user, const xmlChar *notation, const xmlChar *sys_id, const xmlChar *pub_id) +{ + XML_Parser parser = (XML_Parser) user; + + parser->h_notation_decl(parser->user, notation, NULL, sys_id, pub_id); +} + +static void +_external_entity_ref_handler(void *user, const xmlChar *names, int type, const xmlChar *sys_id, const xmlChar *pub_id, xmlChar *content) +{ + XML_Parser parser = (XML_Parser) user; + + parser->h_external_entity_ref(parser->user, names, NULL, sys_id, pub_id); +} + +static xmlSAXHandler +php_xml_compat_handlers = { + NULL, /* internalSubset */ + NULL, /* isStandalone */ + NULL, /* hasInternalSubset */ + NULL, /* hasExternalSubset */ + NULL, /* resolveEntity */ + NULL, /* getEntity */ + _external_entity_ref_handler, /* entityDecl */ + _notation_decl_handler, + NULL, /* attributeDecl */ + NULL, /* elementDecl */ + _unparsed_entity_decl_handler, /* unparsedEntity */ + NULL, /* setDocumentLocator */ + NULL, /* startDocument */ + NULL, /* endDocument */ + _start_element_handler, + _end_element_handler, + NULL, /* reference */ + _cdata_handler, + NULL, /* ignorableWhitespace */ + _pi_handler, + NULL, /* comment */ + NULL, /* warning */ + NULL, /* error */ + NULL /* fatalError */ +}; + +XML_Parser +XML_ParserCreate(const XML_Char *encoding) +{ + XML_Parser parser; + + parser = (XML_Parser) calloc(1, sizeof(struct _XML_Parser)); + parser->parser = xmlCreatePushParserCtxt((xmlSAXHandlerPtr) &php_xml_compat_handlers, (void *) parser, NULL, 0, NULL); + if (parser->parser == NULL) { + return NULL; + } + parser->parser->encoding = xmlStrdup(encoding); + + return parser; +} + +XML_Parser +XML_ParserCreateNS(const XML_Char *encoding, const XML_Char sep) +{ + XML_Parser parser; + + parser = (XML_Parser) calloc(1, sizeof(struct _XML_Parser)); + parser->parser = xmlCreatePushParserCtxt((xmlSAXHandlerPtr) &php_xml_compat_handlers, (void *) parser, NULL, 0, NULL); + if (parser->parser == NULL) { + return NULL; + } + parser->parser->encoding = xmlStrdup(encoding); + parser->namespace = 1; + parser->_ns_map = xmlHashCreate(10); + parser->_reverse_ns_map = xmlHashCreate(10); + + return parser; +} + +void +XML_SetUserData(XML_Parser parser, void *user) +{ + parser->user = user; +} + +void * +XML_GetUserData(XML_Parser parser) +{ + return parser->user; +} + +void +XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end) +{ + parser->h_start_element = start; + parser->h_end_element = end; +} + +void +XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler cdata) +{ + parser->h_cdata = cdata; +} + +void +XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler pi) +{ + parser->h_pi = pi; +} + +void +XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler d) +{ + parser->h_default = d; +} + +void +XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler unparsed_decl) +{ + parser->h_unparsed_entity_decl = unparsed_decl; +} + +void +XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler notation_decl) +{ + parser->h_notation_decl = notation_decl; +} + +void +XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler ext_entity) +{ + parser->h_external_entity_ref = ext_entity; +} + +void +XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start_ns) +{ + parser->h_start_ns = start_ns; +} + +void +XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end_ns) +{ + parser->h_end_ns = end_ns; +} + +int +XML_Parse(XML_Parser parser, const XML_Char *data, int data_len, int is_final) +{ + return !xmlParseChunk(parser->parser, data, data_len, is_final); +} + +int +XML_GetErrorCode(XML_Parser parser) +{ + return parser->parser->errNo; +} + +const XML_Char *error_mapping[] = { + "Unknown Error" +}; + +const XML_Char * +XML_ErrorString(int code) +{ + return error_mapping[code]; +} + +int +XML_GetCurrentLineNumber(XML_Parser parser) +{ + return parser->parser->input->line; +} + +int +XML_GetCurrentColumnNumber(XML_Parser parser) +{ + return parser->parser->input->col; +} + +int +XML_GetCurrentByteIndex(XML_Parser parser) +{ + return parser->parser->input->consumed; +} + +const XML_Char *XML_ExpatVersion(void) +{ + return "1.0"; +} + +static void +_free_ns_name(void *ptr, xmlChar *name) +{ + xmlFree(ptr); +} + +void +XML_ParserFree(XML_Parser parser) +{ + if (parser->namespace) { + xmlHashFree(parser->_ns_map, _free_ns_name); + xmlHashFree(parser->_reverse_ns_map, _free_ns_name); + } + xmlFreeParserCtxt(parser->parser); + free(parser); +} + +#endif /* LIBXML_EXPAT_COMPAT */ + +/** + * Local Variables: + * tab-width: 4 + * c-basic-offset: 4 + * indent-tabs-mode: t + * End: + * vim600: fdm=marker + * vim: ts=4 noet sw=4 + */ diff --git a/ext/xml/config.m4 b/ext/xml/config.m4 index 40dabd3e86..c4c2de4aa6 100644 --- a/ext/xml/config.m4 +++ b/ext/xml/config.m4 @@ -5,31 +5,12 @@ dnl PHP_ARG_ENABLE(xml,whether to enable XML support, [ --disable-xml Disable XML support using bundled expat lib], yes) -PHP_ARG_WITH(expat-dir, external libexpat install dir, -[ --with-expat-dir=DIR XML: external libexpat install dir], no, no) - if test "$PHP_XML" = "yes"; then - AC_DEFINE(HAVE_LIBEXPAT, 1, [ ]) - - if test "$PHP_EXPAT_DIR" = "no" && test "$PHP_BUNDLE_EXPAT" = "no"; then - AC_MSG_ERROR(xml support is enabled, however the expat bundle is disabled and no external expat directory was specified.) - fi - - if test "$PHP_EXPAT_DIR" != "no"; then - for i in $PHP_XML $PHP_EXPAT_DIR; do - if test -f $i/lib/libexpat.a -o -f $i/lib/libexpat.$SHLIB_SUFFIX_NAME ; then - EXPAT_DIR=$i - fi - done - - if test -z "$EXPAT_DIR"; then - AC_MSG_ERROR(not found. Please reinstall the expat distribution.) - fi + AC_DEFINE(HAVE_XML, 1, [ ]) - PHP_ADD_INCLUDE($EXPAT_DIR/include) - PHP_ADD_LIBRARY_WITH_PATH(expat, $EXPAT_DIR/lib, XML_SHARED_LIBADD) - PHP_SUBST(XML_SHARED_LIBADD) - fi + if test "$PHP_BUNDLE_EXPAT" = "no" && test "$PHP_BUNDLE_LIBXML" = "no"; then + AC_MSG_ERROR(xml support is enabled, however both xml libraries have been disabled.) + fi - PHP_NEW_EXTENSION(xml, xml.c, $ext_shared) + PHP_NEW_EXTENSION(xml, compat.c xml.c, $ext_shared) fi diff --git a/ext/xml/expat_compat.h b/ext/xml/expat_compat.h new file mode 100644 index 0000000000..1ece0bd608 --- /dev/null +++ b/ext/xml/expat_compat.h @@ -0,0 +1,123 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 4 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2003 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Sterling Hughes <sterling@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#ifndef PHP_EXPAT_COMPAT_H +#define PHP_EXPAT_COMPAT_H + +#if !defined(HAVE_LIBEXPAT) && defined(HAVE_LIBXML) +#define LIBXML_EXPAT_COMPAT 1 + +#include <libxml.h> +#include <libxml/hash.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/tree.h> + +typedef xmlChar XML_Char; + +typedef void (*XML_StartElementHandler)(void *, const XML_Char *, const XML_Char **); +typedef void (*XML_EndElementHandler)(void *, const XML_Char *); +typedef void (*XML_CharacterDataHandler)(void *, const XML_Char *, int); +typedef void (*XML_ProcessingInstructionHandler)(void *, const XML_Char *, const XML_Char *); +typedef void (*XML_DefaultHandler)(void *, const XML_Char *, int); +typedef void (*XML_UnparsedEntityDeclHandler)(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); +typedef void (*XML_NotationDeclHandler)(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); +typedef int (*XML_ExternalEntityRefHandler)(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); +typedef void (*XML_StartNamespaceDeclHandler)(void *, const XML_Char *, const XML_Char *); +typedef void (*XML_EndNamespaceDeclHandler)(void *, const XML_Char *); + +typedef struct _XML_Parser { + int namespace; + + xmlHashTablePtr _ns_map; + xmlHashTablePtr _reverse_ns_map; + + void *user; + xmlParserCtxtPtr parser; + + XML_StartElementHandler h_start_element; + XML_EndElementHandler h_end_element; + XML_CharacterDataHandler h_cdata; + XML_ProcessingInstructionHandler h_pi; + XML_DefaultHandler h_default; + XML_UnparsedEntityDeclHandler h_unparsed_entity_decl; + XML_NotationDeclHandler h_notation_decl; + XML_ExternalEntityRefHandler h_external_entity_ref; + XML_StartNamespaceDeclHandler h_start_ns; + XML_EndNamespaceDeclHandler h_end_ns; +} *XML_Parser; + +XML_Parser XML_ParserCreate(const XML_Char *); +XML_Parser XML_ParserCreateNS(const XML_Char *, const XML_Char); +void XML_SetUserData(XML_Parser, void *); +void *XML_GetUserData(XML_Parser); +void XML_SetElementHandler(XML_Parser, XML_StartElementHandler, XML_EndElementHandler); +void XML_SetCharacterDataHandler(XML_Parser, XML_CharacterDataHandler); +void XML_SetProcessingInstructionHandler(XML_Parser, XML_ProcessingInstructionHandler); +void XML_SetDefaultHandler(XML_Parser, XML_DefaultHandler); +void XML_SetUnparsedEntityDeclHandler(XML_Parser, XML_UnparsedEntityDeclHandler); +void XML_SetNotationDeclHandler(XML_Parser, XML_NotationDeclHandler); +void XML_SetExternalEntityRefHandler(XML_Parser, XML_ExternalEntityRefHandler); +void XML_SetStartNamespaceDeclHandler(XML_Parser, XML_StartNamespaceDeclHandler); +void XML_SetEndNamespaceDeclHandler(XML_Parser, XML_EndNamespaceDeclHandler); +int XML_Parse(XML_Parser, const XML_Char *, int data_len, int is_final); +int XML_GetErrorCode(XML_Parser); +const XML_Char *XML_ErrorString(int); +int XML_GetCurrentLineNumber(XML_Parser); +int XML_GetCurrentColumnNumber(XML_Parser); +int XML_GetCurrentByteIndex(XML_Parser); +const XML_Char *XML_ExpatVersion(void); +void XML_ParserFree(XML_Parser); + +#define XML_ERROR_NONE 0 +#define XML_ERROR_NO_MEMORY 0 +#define XML_ERROR_SYNTAX 0 +#define XML_ERROR_NO_ELEMENTS 0 +#define XML_ERROR_INVALID_TOKEN 0 +#define XML_ERROR_UNCLOSED_TOKEN 0 +#define XML_ERROR_PARTIAL_CHAR 0 +#define XML_ERROR_TAG_MISMATCH 0 +#define XML_ERROR_DUPLICATE_ATTRIBUTE 0 +#define XML_ERROR_JUNK_AFTER_DOC_ELEMENT 0 +#define XML_ERROR_PARAM_ENTITY_REF 0 +#define XML_ERROR_UNDEFINED_ENTITY 0 +#define XML_ERROR_RECURSIVE_ENTITY_REF 0 +#define XML_ERROR_ASYNC_ENTITY 0 +#define XML_ERROR_BAD_CHAR_REF 0 +#define XML_ERROR_BINARY_ENTITY_REF 0 +#define XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF 0 +#define XML_ERROR_MISPLACED_XML_PI 0 +#define XML_ERROR_UNKNOWN_ENCODING 0 +#define XML_ERROR_INCORRECT_ENCODING 0 +#define XML_ERROR_UNCLOSED_CDATA_SECTION 0 +#define XML_ERROR_EXTERNAL_ENTITY_HANDLING 0 + +#else +#include <expat.h> +#endif /* HAVE_LIBEXPAT */ + +#endif /* PHP_EXPAT_COMPAT_H */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + */ diff --git a/ext/xml/php_xml.h b/ext/xml/php_xml.h index 7fef7c6070..ca77d3d224 100644 --- a/ext/xml/php_xml.h +++ b/ext/xml/php_xml.h @@ -22,20 +22,16 @@ #ifndef PHP_XML_H #define PHP_XML_H -#ifdef HAVE_LIBEXPAT - +#ifdef HAVE_XML extern zend_module_entry xml_module_entry; #define xml_module_ptr &xml_module_entry - #else - #define xml_module_ptr NULL - #endif -#if defined(PHP_XML_INTERNAL) +#ifdef HAVE_XML -#include <expat.h> +#include "ext/xml/expat_compat.h" #ifdef PHP_WIN32 #define PHP_XML_API __declspec(dllexport) diff --git a/ext/xml/xml.c b/ext/xml/xml.c index 15cae13b7b..04b654fae3 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -28,13 +28,13 @@ #include "php.h" #define PHP_XML_INTERNAL -#include "php_xml.h" #include "zend_variables.h" #include "ext/standard/php_string.h" #include "ext/standard/info.h" -#if HAVE_LIBEXPAT +#if HAVE_XML +#include "php_xml.h" # include "ext/standard/head.h" /* Short-term TODO list: @@ -85,8 +85,8 @@ static int _xml_xmlcharlen(const XML_Char *); static void _xml_add_to_info(xml_parser *parser,char *name); inline static char *_xml_decode_tag(xml_parser *parser, const char *tag); -void _xml_startElementHandler(void *, const char *, const char **); -void _xml_endElementHandler(void *, const char *); +void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **); +void _xml_endElementHandler(void *, const XML_Char *); void _xml_characterDataHandler(void *, const XML_Char *, int); void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *); void _xml_defaultHandler(void *, const XML_Char *, int); @@ -202,6 +202,10 @@ PHP_MINIT_FUNCTION(xml) REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_CS|CONST_PERSISTENT); REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_CS|CONST_PERSISTENT); REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_CS|CONST_PERSISTENT); + +#ifdef LIBXML_EXPAT_COMPAT + xmlMemSetup(_efree, _emalloc, _erealloc, _estrdup); +#endif return SUCCESS; } @@ -615,7 +619,7 @@ static char *_xml_decode_tag(xml_parser *parser, const char *tag) /* }}} */ /* {{{ _xml_startElementHandler() */ -void _xml_startElementHandler(void *userData, const char *name, const char **attributes) +void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes) { xml_parser *parser = (xml_parser *)userData; const char **attrs = attributes; @@ -701,7 +705,7 @@ void _xml_startElementHandler(void *userData, const char *name, const char **att /* }}} */ /* {{{ _xml_endElementHandler() */ -void _xml_endElementHandler(void *userData, const char *name) +void _xml_endElementHandler(void *userData, const XML_Char *name) { xml_parser *parser = (xml_parser *)userData; char *tag_name; |