diff options
author | joe <joe@61a7d7f5-40b7-0310-9c16-bb0ea8cb1845> | 2004-10-02 18:47:02 +0000 |
---|---|---|
committer | joe <joe@61a7d7f5-40b7-0310-9c16-bb0ea8cb1845> | 2004-10-02 18:47:02 +0000 |
commit | 0294ff3d3282d1b1c5497f00ea25e5e55e6f4338 (patch) | |
tree | 978af6f81c7b7715597871b1e89a9ad083907f1a /src/ne_xml.c | |
download | neon-0294ff3d3282d1b1c5497f00ea25e5e55e6f4338.tar.gz |
Import neon 0.24.0 to begin 0.24.x branch.
git-svn-id: http://svn.webdav.org/repos/projects/neon/branches/0.24.x@243 61a7d7f5-40b7-0310-9c16-bb0ea8cb1845
Diffstat (limited to 'src/ne_xml.c')
-rw-r--r-- | src/ne_xml.c | 605 |
1 files changed, 605 insertions, 0 deletions
diff --git a/src/ne_xml.c b/src/ne_xml.c new file mode 100644 index 0000000..d55d59c --- /dev/null +++ b/src/ne_xml.c @@ -0,0 +1,605 @@ +/* + Higher Level Interface to XML Parsers. + Copyright (C) 1999-2003, Joe Orton <joe@manyfish.co.uk> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + MA 02111-1307, USA + +*/ + +#include "config.h" + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_STRING_H +#include <string.h> +#endif +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +#include "ne_i18n.h" + +#include "ne_alloc.h" +#include "ne_xml.h" +#include "ne_utils.h" +#include "ne_string.h" + +#if defined(HAVE_EXPAT) +/* expat support: */ +#ifdef HAVE_XMLPARSE_H +#include "xmlparse.h" +#else +#include <expat.h> +#endif +typedef XML_Char ne_xml_char; +#elif defined(HAVE_LIBXML) +/* libxml2 support: */ +#include <libxml/xmlversion.h> +#include <libxml/parser.h> +typedef xmlChar ne_xml_char; + +#else /* not HAVE_LIBXML */ +# error need an XML parser +#endif /* not HAVE_EXPAT */ + +/* Approx. one screen of text: */ +#define ERR_SIZE (2048) + +struct handler { + ne_xml_startelm_cb *startelm_cb; /* start-element callback */ + ne_xml_endelm_cb *endelm_cb; /* end-element callback */ + ne_xml_cdata_cb *cdata_cb; /* character-data callback. */ + void *userdata; /* userdata for the above. */ + struct handler *next; /* next handler in stack. */ +}; + +#ifdef HAVE_LIBXML +static void sax_error(void *ctx, const char *msg, ...); +#endif + +struct element { + const ne_xml_char *nspace; + ne_xml_char *name; + + int state; /* opaque state integer */ + + /* Namespaces declared in this element */ + ne_xml_char *default_ns; /* A default namespace */ + struct namespace *nspaces; /* List of other namespace scopes */ + + struct handler *handler; /* Handler for this element */ + + struct element *parent; /* parent element, or NULL */ +}; + +/* We pass around a ne_xml_parser as the userdata in the parsing + * library. This maintains the current state of the parse and various + * other bits and bobs. Within the parse, we store the current branch + * of the tree, i.e., the current element and all its parents, up to + * the root, but nothing other than that. */ +struct ne_xml_parser_s { + struct element *root; /* the root of the document */ + struct element *current; /* current element in the branch */ + struct handler *top_handlers; /* always points at the + * handler on top of the stack. */ + int valid; /* non-zero whilst parse should continue */ + int prune; /* if non-zero, depth within a dead branch */ + +#ifdef HAVE_EXPAT + XML_Parser parser; + char *encoding; +#else + xmlParserCtxtPtr parser; +#endif + char error[ERR_SIZE]; +}; + +/* The callback handlers */ +static void start_element(void *userdata, const ne_xml_char *name, const ne_xml_char **atts); +static void end_element(void *userdata, const ne_xml_char *name); +static void char_data(void *userdata, const ne_xml_char *cdata, int len); +static const char *resolve_nspace(const struct element *elm, + const char *prefix, size_t pfxlen); + +/* Linked list of namespace scopes */ +struct namespace { + ne_xml_char *name; + ne_xml_char *uri; + struct namespace *next; +}; + +#ifdef HAVE_LIBXML + +/* Could be const as far as we care, but libxml doesn't want that */ +static xmlSAXHandler sax_handler = { + NULL, /* internalSubset */ + NULL, /* isStandalone */ + NULL, /* hasInternalSubset */ + NULL, /* hasExternalSubset */ + NULL, /* resolveEntity */ + NULL, /* getEntity */ + NULL, /* entityDecl */ + NULL, /* notationDecl */ + NULL, /* attributeDecl */ + NULL, /* elementDecl */ + NULL, /* unparsedEntityDecl */ + NULL, /* setDocumentLocator */ + NULL, /* startDocument */ + NULL, /* endDocument */ + start_element, /* startElement */ + end_element, /* endElement */ + NULL, /* reference */ + char_data, /* characters */ + NULL, /* ignorableWhitespace */ + NULL, /* processingInstruction */ + NULL, /* comment */ + NULL, /* xmlParserWarning */ + sax_error, /* xmlParserError */ + sax_error, /* fatal error (never called by libxml2?) */ + NULL, /* getParameterEntity */ + char_data /* cdataBlock */ +}; + +/* empty attributes array to mimic expat behaviour */ +static const char *empty_atts[] = {NULL, NULL}; + +/* macro for determining the attributes array to pass */ +#define PASS_ATTS(atts) (atts ? (const char **)(atts) : empty_atts) + +#else + +#define PASS_ATTS(atts) ((const char **)(atts)) + +/* XML declaration callback for expat. */ +static void decl_handler(void *userdata, + const XML_Char *version, const XML_Char *encoding, + int standalone) +{ + ne_xml_parser *p = userdata; + if (encoding) p->encoding = ne_strdup(encoding); +} + +#endif /* HAVE_LIBXML */ + +int ne_xml_currentline(ne_xml_parser *p) +{ +#ifdef HAVE_EXPAT + return XML_GetCurrentLineNumber(p->parser); +#else + return p->parser->input->line; +#endif +} + +const char *ne_xml_doc_encoding(const ne_xml_parser *p) +{ +#ifdef HAVE_LIBXML + return p->parser->encoding; +#else + return p->encoding; +#endif +} + +/* Extract the namespace prefix declarations from 'atts'. */ +static int declare_nspaces(ne_xml_parser *p, struct element *elm, + const ne_xml_char **atts) +{ + int n; + + for (n = 0; atts && atts[n]; n += 2) { + if (strcasecmp(atts[n], "xmlns") == 0) { + /* New default namespace */ + elm->default_ns = ne_strdup(atts[n+1]); + } else if (strncasecmp(atts[n], "xmlns:", 6) == 0) { + struct namespace *ns; + + if (atts[n][6] == '\0' || atts[n+1][0] == '\0') { + ne_snprintf(p->error, ERR_SIZE, + ("XML parse error at line %d: invalid namespace " + "declaration"), ne_xml_currentline(p)); + return -1; + } + + /* New namespace scope */ + ns = ne_calloc(sizeof(*ns)); + ns->next = elm->nspaces; + elm->nspaces = ns; + ns->name = ne_strdup(atts[n]+6); /* skip the xmlns= */ + ns->uri = ne_strdup(atts[n+1]); + } + } + + return 0; +} + +/* Expand an XML qualified name, which may include a namespace prefix + * as well as the local part. */ +static int expand_qname(ne_xml_parser *p, struct element *elm, + const ne_xml_char *qname) +{ + const ne_xml_char *pfx; + + pfx = strchr(qname, ':'); + if (pfx == NULL) { + struct element *e = elm; + + /* Find default namespace; guaranteed to terminate as the root + * element always has default_ns="". */ + while (e->default_ns == NULL) + e = e->parent; + + elm->name = ne_strdup(qname); + elm->nspace = e->default_ns; + } else { + const char *uri = resolve_nspace(elm, qname, pfx-qname); + + if (uri) { + /* The name is everything after the ':' */ + if (pfx[1] == '\0') { + ne_snprintf(p->error, ERR_SIZE, + ("XML parse error at line %d: element name missing" + "after namespace prefix"), ne_xml_currentline(p)); + return -1; + } + elm->name = ne_strdup(pfx+1); + elm->nspace = uri; + } else { + ne_snprintf(p->error, ERR_SIZE, + ("XML parse error at line %d: undeclared namespace"), + ne_xml_currentline(p)); + return -1; + } + } + return 0; +} + +/* Called with the start of a new element. */ +static void start_element(void *userdata, const ne_xml_char *name, + const ne_xml_char **atts) +{ + ne_xml_parser *p = userdata; + struct element *elm; + struct handler *hand; + int state = NE_XML_DECLINE; + + if (!p->valid) return; + + if (p->prune) { + p->prune++; + return; + } + + /* Create a new element */ + elm = ne_calloc(sizeof *elm); + elm->parent = p->current; + p->current = elm; + + if (declare_nspaces(p, elm, atts) || expand_qname(p, elm, name)) { + p->valid = 0; + return; + } + + /* Find a handler which will accept this element (or abort the parse) */ + for (hand = elm->parent->handler; hand && state == NE_XML_DECLINE; + hand = hand->next) { + elm->handler = hand; + state = hand->startelm_cb(hand->userdata, elm->parent->state, + elm->nspace, elm->name, PASS_ATTS(atts)); + } + + NE_DEBUG(NE_DBG_XMLPARSE, "XML: start-element (%d, {%s, %s}) => %d\n", + elm->parent->state, elm->nspace, elm->name, state); + + if (state > 0) + elm->state = state; + else if (state == NE_XML_DECLINE) + /* prune this branch. */ + p->prune++; + else /* state == NE_XML_ABORT */ + p->valid = 0; +} + +/* Destroys an element structure. */ +static void destroy_element(struct element *elm) +{ + struct namespace *this_ns, *next_ns; + ne_free(elm->name); + /* Free the namespaces */ + this_ns = elm->nspaces; + while (this_ns != NULL) { + next_ns = this_ns->next; + ne_free(this_ns->name); + ne_free(this_ns->uri); + ne_free(this_ns); + this_ns = next_ns; + }; + if (elm->default_ns) + ne_free(elm->default_ns); + ne_free(elm); +} + +/* cdata SAX callback */ +static void char_data(void *userdata, const ne_xml_char *data, int len) +{ + ne_xml_parser *p = userdata; + struct element *elm = p->current; + + if (!p->valid || p->prune) return; + + if (elm->handler->cdata_cb && + elm->handler->cdata_cb(elm->handler->userdata, elm->state, data, len)) { + NE_DEBUG(NE_DBG_XML, "Cdata callback failed.\n"); + p->valid = 0; + } +} + +/* Called with the end of an element */ +static void end_element(void *userdata, const ne_xml_char *name) +{ + ne_xml_parser *p = userdata; + struct element *elm = p->current; + + if (!p->valid) return; + + if (p->prune) { + if (p->prune-- > 1) return; + } else if (elm->handler->endelm_cb && + elm->handler->endelm_cb(elm->handler->userdata, elm->state, + elm->nspace, elm->name)) { + NE_DEBUG(NE_DBG_XML, "XML: end-element for %d failed.\n", elm->state); + p->valid = 0; + } + + NE_DEBUG(NE_DBG_XMLPARSE, "XML: end-element (%d, {%s, %s})\n", + elm->state, elm->nspace, elm->name); + + /* move back up the tree */ + p->current = elm->parent; + p->prune = 0; + + destroy_element(elm); +} + +/* Find a namespace definition for 'prefix' in given element, where + * length of prefix is 'pfxlen'. Returns the URI or NULL. */ +static const char *resolve_nspace(const struct element *elm, + const char *prefix, size_t pfxlen) +{ + const struct element *s; + + /* Search up the tree. */ + for (s = elm; s != NULL; s = s->parent) { + const struct namespace *ns; + /* Iterate over defined spaces on this node. */ + for (ns = s->nspaces; ns != NULL; ns = ns->next) { + if (strlen(ns->name) == pfxlen && + memcmp(ns->name, prefix, pfxlen) == 0) + return ns->uri; + } + } + + return NULL; +} + +ne_xml_parser *ne_xml_create(void) +{ + ne_xml_parser *p = ne_calloc(sizeof *p); + /* Initialize other stuff */ + p->valid = 1; + /* Placeholder for the root element */ + p->current = p->root = ne_calloc(sizeof *p->root); + p->root->default_ns = ""; + p->root->state = 0; +#ifdef HAVE_EXPAT + p->parser = XML_ParserCreate(NULL); + if (p->parser == NULL) { + abort(); + } + XML_SetElementHandler(p->parser, start_element, end_element); + XML_SetCharacterDataHandler(p->parser, char_data); + XML_SetUserData(p->parser, (void *) p); + XML_SetXmlDeclHandler(p->parser, decl_handler); +#else + p->parser = xmlCreatePushParserCtxt(&sax_handler, + (void *)p, NULL, 0, NULL); + if (p->parser == NULL) { + abort(); + } + p->parser->replaceEntities = 1; +#endif + return p; +} + +void ne_xml_push_handler(ne_xml_parser *p, + ne_xml_startelm_cb *startelm_cb, + ne_xml_cdata_cb *cdata_cb, + ne_xml_endelm_cb *endelm_cb, + void *userdata) +{ + struct handler *hand = ne_calloc(sizeof(struct handler)); + + hand->startelm_cb = startelm_cb; + hand->cdata_cb = cdata_cb; + hand->endelm_cb = endelm_cb; + hand->userdata = userdata; + + /* If this is the first handler registered, update the + * base pointer too. */ + if (p->top_handlers == NULL) { + p->root->handler = hand; + p->top_handlers = hand; + } else { + p->top_handlers->next = hand; + p->top_handlers = hand; + } +} + +void ne_xml_parse_v(void *userdata, const char *block, size_t len) +{ + ne_xml_parser *p = userdata; + /* FIXME: The two XML parsers break all our nice abstraction by + * choosing different char *'s. The swine. This cast will come + * back and bite us someday, no doubt. */ + ne_xml_parse(p, block, len); +} + +/* Parse the given block of input of length len */ +void ne_xml_parse(ne_xml_parser *p, const char *block, size_t len) +{ + int ret, flag; + /* duck out if it's broken */ + if (!p->valid) { + NE_DEBUG(NE_DBG_XML, "Not parsing %" NE_FMT_SIZE_T " bytes.\n", + len); + return; + } + if (len == 0) { + flag = -1; + block = ""; + NE_DEBUG(NE_DBG_XML, "Got 0-length buffer, end of document.\n"); + } else { + NE_DEBUG(NE_DBG_XML, "Parsing %" NE_FMT_SIZE_T " length buffer.\n", + len); + flag = 0; + } + /* Note, don't write a parser error if !p->valid, since an error + * will already have been written in that case. */ +#ifdef HAVE_EXPAT + ret = XML_Parse(p->parser, block, len, flag); + NE_DEBUG(NE_DBG_XMLPARSE, "XML_Parse returned %d\n", ret); + if (ret == 0 && p->valid) { + ne_snprintf(p->error, ERR_SIZE, + "XML parse error at line %d: %s", + XML_GetCurrentLineNumber(p->parser), + XML_ErrorString(XML_GetErrorCode(p->parser))); + p->valid = 0; + } +#else + ret = xmlParseChunk(p->parser, block, len, flag); + NE_DEBUG(NE_DBG_XMLPARSE, "xmlParseChunk returned %d\n", ret); + /* Parse errors are normally caught by the sax_error() callback, + * which clears p->valid. */ + if (p->parser->errNo && p->valid) { + ne_snprintf(p->error, ERR_SIZE, "XML parse error at line %d.", + ne_xml_currentline(p)); + p->valid = 0; + } +#endif +} + +int ne_xml_valid(ne_xml_parser *p) +{ + return p->valid; +} + +void ne_xml_destroy(ne_xml_parser *p) +{ + struct element *elm, *parent; + struct handler *hand, *next; + + /* Free up the handlers on the stack: the root element has the + * pointer to the base of the handler stack. */ + for (hand = p->root->handler; hand!=NULL; hand=next) { + next = hand->next; + ne_free(hand); + } + + /* Clean up remaining elements */ + for (elm = p->current; elm != p->root; elm = parent) { + parent = elm->parent; + destroy_element(elm); + } + + /* free root element */ + ne_free(p->root); + +#ifdef HAVE_EXPAT + XML_ParserFree(p->parser); + if (p->encoding) ne_free(p->encoding); +#else + xmlFreeParserCtxt(p->parser); +#endif + + ne_free(p); +} + +void ne_xml_set_error(ne_xml_parser *p, const char *msg) +{ + ne_snprintf(p->error, ERR_SIZE, msg); +} + +#ifdef HAVE_LIBXML +static void sax_error(void *ctx, const char *msg, ...) +{ + ne_xml_parser *p = ctx; + va_list ap; + char buf[1024]; + + va_start(ap, msg); + ne_vsnprintf(buf, 1024, msg, ap); + va_end(ap); + + ne_snprintf(p->error, ERR_SIZE, + _("XML parse error at line %d: %s."), + p->parser->input->line, buf); + + p->valid = 0; +} +#endif + +const char *ne_xml_get_error(ne_xml_parser *p) +{ + return p->error; +} + +const char * +ne_xml_get_attr(ne_xml_parser *p, const char **attrs, + const char *nspace, const char *name) +{ + int n; + + for (n = 0; attrs[n] != NULL; n += 2) { + char *pnt = strchr(attrs[n], ':'); + + if (!nspace && !pnt && strcmp(attrs[n], name) == 0) { + return attrs[n+1]; + } else if (nspace && pnt) { + /* If a namespace is given, and the local part matches, + * then resolve the namespace and compare that too. */ + if (strcmp(pnt + 1, name) == 0) { + const char *uri = resolve_nspace(p->current, + attrs[n], pnt - attrs[n]); + if (uri && strcmp(uri, nspace) == 0) + return attrs[n+1]; + } + } + } + + return NULL; +} + +int ne_xml_mapid(const struct ne_xml_idmap map[], size_t maplen, + const char *nspace, const char *name) +{ + size_t n; + + for (n = 0; n < maplen; n++) + if (strcmp(name, map[n].name) == 0 && + strcmp(nspace, map[n].nspace) == 0) + return map[n].id; + + return 0; +} |