summaryrefslogtreecommitdiff
path: root/xml
diff options
context:
space:
mode:
authorNick Kew <niq@apache.org>2011-03-23 16:01:12 +0000
committerNick Kew <niq@apache.org>2011-03-23 16:01:12 +0000
commitf81f9d66310897b7f39ecd3a0f632230f5b0bcfe (patch)
treedd96f9b67af4604d9ff1fb315aadde156af411b9 /xml
parent825123d5ca81e39644430a0c3f84278eb79cc83c (diff)
downloadapr-f81f9d66310897b7f39ecd3a0f632230f5b0bcfe.tar.gz
Decouple apr_xml from reliance on Expat
Build with expat and it's effectively unchanged. The alternative build with libxml2 is compatible to the point that it passes the test suite, but shouldn't be considered ready for primetime! Various hacks want sorting: this is proof-of-concept. This requires a compile-time choice. Runtime would be nice, round tuits permitting. Build hacks & docs TBD git-svn-id: https://svn.apache.org/repos/asf/apr/apr/trunk@1084621 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'xml')
-rw-r--r--xml/apr_xml.c127
-rw-r--r--xml/apr_xml_expat.c134
-rw-r--r--xml/apr_xml_internal.h50
-rw-r--r--xml/apr_xml_libxml2.c96
4 files changed, 290 insertions, 117 deletions
diff --git a/xml/apr_xml.c b/xml/apr_xml.c
index f79c80ce8..651e40f64 100644
--- a/xml/apr_xml.c
+++ b/xml/apr_xml.c
@@ -23,22 +23,15 @@
#include "apr_want.h"
#include "apr_xml.h"
-
-#if defined(HAVE_XMLPARSE_XMLPARSE_H)
-#include <xmlparse/xmlparse.h>
-#elif defined(HAVE_XMLTOK_XMLPARSE_H)
-#include <xmltok/xmlparse.h>
-#elif defined(HAVE_XML_XMLPARSE_H)
-#include <xml/xmlparse.h>
-#else
-#include <expat.h>
-#endif
+typedef void* XML_Parser;
+typedef int XML_Error;
+typedef unsigned char XML_Char;
+#include "apr_xml_internal.h"
#define DEBUG_CR "\r\n"
static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
-static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
/* errors related to namespace processing */
#define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
@@ -51,21 +44,6 @@ static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
(name[2] == 0x4C || name[2] == 0x6C) )
-/* the real (internal) definition of the parser context */
-struct apr_xml_parser {
- apr_xml_doc *doc; /* the doc we're parsing */
- apr_pool_t *p; /* the pool we allocate from */
- apr_xml_elem *cur_elem; /* current element */
-
- int error; /* an error has occurred */
-#define APR_XML_ERROR_EXPAT 1
-#define APR_XML_ERROR_PARSE_DONE 2
-/* also: public APR_XML_NS_ERROR_* values (if any) */
-
- XML_Parser xp; /* the actual (Expat) XML parser */
- enum XML_Error xp_err; /* stored Expat error code */
-};
-
/* struct for scoping namespace declarations */
typedef struct apr_xml_ns_scope {
const char *prefix; /* prefix used for this ns */
@@ -139,7 +117,7 @@ static void start_handler(void *userdata, const char *name, const char **attrs)
elem->name = elem_name = apr_pstrdup(parser->p, name);
/* fill in the attributes (note: ends up in reverse order) */
- while (*attrs) {
+ while (attrs && *attrs) {
attr = apr_palloc(parser->p, sizeof(*attr));
attr->name = apr_pstrdup(parser->p, *attrs++);
attr->value = apr_pstrdup(parser->p, *attrs++);
@@ -336,111 +314,26 @@ static void cdata_handler(void *userdata, const char *data, int len)
apr_text_append(parser->p, hdr, s);
}
-static apr_status_t cleanup_parser(void *ctx)
-{
- apr_xml_parser *parser = ctx;
-
- XML_ParserFree(parser->xp);
- parser->xp = NULL;
-
- return APR_SUCCESS;
-}
-
-#if XML_MAJOR_VERSION > 1
-/* Stop the parser if an entity declaration is hit. */
-static void entity_declaration(void *userData, const XML_Char *entityName,
- int is_parameter_entity, const XML_Char *value,
- int value_length, const XML_Char *base,
- const XML_Char *systemId, const XML_Char *publicId,
- const XML_Char *notationName)
-{
- apr_xml_parser *parser = userData;
-
- XML_StopParser(parser->xp, XML_FALSE);
-}
-#else
-/* A noop default_handler. */
-static void default_handler(void *userData, const XML_Char *s, int len)
-{
-}
-#endif
-
APR_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
{
- apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
-
- parser->p = pool;
- parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
-
- parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
-
- /* ### is there a way to avoid hard-coding this? */
- apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
-
- parser->xp = XML_ParserCreate(NULL);
- if (parser->xp == NULL) {
- (*apr_pool_abort_get(pool))(APR_ENOMEM);
- return NULL;
- }
-
- apr_pool_cleanup_register(pool, parser, cleanup_parser,
- apr_pool_cleanup_null);
-
- XML_SetUserData(parser->xp, parser);
- XML_SetElementHandler(parser->xp, start_handler, end_handler);
- XML_SetCharacterDataHandler(parser->xp, cdata_handler);
-
- /* Prevent the "billion laughs" attack against expat by disabling
- * internal entity expansion. With 2.x, forcibly stop the parser
- * if an entity is declared - this is safer and a more obvious
- * failure mode. With older versions, installing a noop
- * DefaultHandler means that internal entities will be expanded as
- * the empty string, which is also sufficient to prevent the
- * attack. */
-#if XML_MAJOR_VERSION > 1
- XML_SetEntityDeclHandler(parser->xp, entity_declaration);
-#else
- XML_SetDefaultHandler(parser->xp, default_handler);
-#endif
-
- return parser;
-}
-
-static apr_status_t do_parse(apr_xml_parser *parser,
- const char *data, apr_size_t len,
- int is_final)
-{
- if (parser->xp == NULL) {
- parser->error = APR_XML_ERROR_PARSE_DONE;
- }
- else {
- int rv = XML_Parse(parser->xp, data, (int)len, is_final);
-
- if (rv == 0) {
- parser->error = APR_XML_ERROR_EXPAT;
- parser->xp_err = XML_GetErrorCode(parser->xp);
- }
- }
-
- /* ### better error code? */
- return parser->error ? APR_EGENERAL : APR_SUCCESS;
+ return apr_xml_parser_create_ex(pool, &start_handler, &end_handler, &cdata_handler);
}
APR_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
const char *data,
apr_size_t len)
{
- return do_parse(parser, data, len, 0 /* is_final */);
+ return parser->impl->Parse(parser, data, len, 0 /* is_final */);
}
APR_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
apr_xml_doc **pdoc)
{
char end;
- apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
+ apr_status_t status = parser->impl->Parse(parser, &end, 0, 1 /* is_final */);
/* get rid of the parser */
- (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
+ (void) apr_pool_cleanup_run(parser->p, parser, parser->impl->cleanup);
if (status)
return status;
@@ -476,7 +369,7 @@ APR_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
case APR_XML_ERROR_EXPAT:
(void) apr_snprintf(errbuf, errbufsize,
"XML parser error code: %s (%d)",
- XML_ErrorString(parser->xp_err), parser->xp_err);
+ parser->xp_msg, parser->xp_err);
return errbuf;
case APR_XML_ERROR_PARSE_DONE:
diff --git a/xml/apr_xml_expat.c b/xml/apr_xml_expat.c
new file mode 100644
index 000000000..6d986a6aa
--- /dev/null
+++ b/xml/apr_xml_expat.c
@@ -0,0 +1,134 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "apr.h"
+#include "apr_xml.h"
+
+typedef enum XML_Error XML_Error;
+#if defined(HAVE_XMLPARSE_XMLPARSE_H)
+#include <xmlparse/xmlparse.h>
+#elif defined(HAVE_XMLTOK_XMLPARSE_H)
+#include <xmltok/xmlparse.h>
+#elif defined(HAVE_XML_XMLPARSE_H)
+#include <xml/xmlparse.h>
+#else
+#include <expat.h>
+#endif
+
+
+#include "apr_xml_internal.h"
+
+static apr_status_t cleanup_parser(void *ctx)
+{
+ apr_xml_parser *parser = ctx;
+
+ XML_ParserFree(parser->xp);
+ parser->xp = NULL;
+
+ return APR_SUCCESS;
+}
+static apr_status_t do_parse(apr_xml_parser *parser,
+ const char *data, apr_size_t len,
+ int is_final)
+{
+ if (parser->xp == NULL) {
+ parser->error = APR_XML_ERROR_PARSE_DONE;
+ }
+ else {
+ int rv = XML_Parse(parser->xp, data, (int)len, is_final);
+
+ if (rv == 0) {
+ parser->error = APR_XML_ERROR_EXPAT;
+ parser->xp_err = XML_GetErrorCode(parser->xp);
+ parser->xp_msg = XML_ErrorString(parser->xp_err);
+ }
+ }
+
+ /* ### better error code? */
+ return parser->error ? APR_EGENERAL : APR_SUCCESS;
+}
+
+
+static XMLParserImpl xml_parser_expat = {
+ do_parse,
+ cleanup_parser
+};
+
+XMLParserImpl* apr_xml_get_parser_impl(void) { return &xml_parser_expat; }
+static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
+
+#if XML_MAJOR_VERSION > 1
+/* Stop the parser if an entity declaration is hit. */
+static void entity_declaration(void *userData, const XML_Char *entityName,
+ int is_parameter_entity, const XML_Char *value,
+ int value_length, const XML_Char *base,
+ const XML_Char *systemId, const XML_Char *publicId,
+ const XML_Char *notationName)
+{
+ apr_xml_parser *parser = userData;
+
+ XML_StopParser(parser->xp, XML_FALSE);
+}
+#else
+/* A noop default_handler. */
+static void default_handler(void *userData, const XML_Char *s, int len)
+{
+}
+#endif
+
+APR_DECLARE(apr_xml_parser *) apr_xml_parser_create_ex(apr_pool_t *pool,
+ void *start_func, void *end_func, void *cdata_func)
+{
+ apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
+
+ parser->impl = apr_xml_get_parser_impl();
+
+ parser->p = pool;
+ parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
+
+ parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
+
+ /* ### is there a way to avoid hard-coding this? */
+ apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
+
+ parser->xp = XML_ParserCreate(NULL);
+ if (parser->xp == NULL) {
+ (*apr_pool_abort_get(pool))(APR_ENOMEM);
+ return NULL;
+ }
+
+ apr_pool_cleanup_register(pool, parser, cleanup_parser,
+ apr_pool_cleanup_null);
+
+ XML_SetUserData(parser->xp, parser);
+ XML_SetElementHandler(parser->xp, start_func, end_func);
+ XML_SetCharacterDataHandler(parser->xp, cdata_func);
+
+ /* Prevent the "billion laughs" attack against expat by disabling
+ * internal entity expansion. With 2.x, forcibly stop the parser
+ * if an entity is declared - this is safer and a more obvious
+ * failure mode. With older versions, installing a noop
+ * DefaultHandler means that internal entities will be expanded as
+ * the empty string, which is also sufficient to prevent the
+ * attack. */
+#if XML_MAJOR_VERSION > 1
+ XML_SetEntityDeclHandler(parser->xp, entity_declaration);
+#else
+ XML_SetDefaultHandler(parser->xp, default_handler);
+#endif
+
+ return parser;
+}
diff --git a/xml/apr_xml_internal.h b/xml/apr_xml_internal.h
new file mode 100644
index 000000000..573d1d591
--- /dev/null
+++ b/xml/apr_xml_internal.h
@@ -0,0 +1,50 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef APR_XML_INTERNAL_H
+#define APR_XML_INTERNAL_H
+
+
+struct XMLParserImpl {
+ apr_status_t (*Parse)(apr_xml_parser*, const char*, apr_size_t, int);
+ apr_status_t (*cleanup)(void*);
+};
+typedef struct XMLParserImpl XMLParserImpl;
+XMLParserImpl* apr_xml_get_parser_impl(void);
+
+
+/* the real (internal) definition of the parser context */
+struct apr_xml_parser {
+ apr_xml_doc *doc; /* the doc we're parsing */
+ apr_pool_t *p; /* the pool we allocate from */
+ apr_xml_elem *cur_elem; /* current element */
+
+ int error; /* an error has occurred */
+#define APR_XML_ERROR_EXPAT 1
+#define APR_XML_ERROR_PARSE_DONE 2
+/* also: public APR_XML_NS_ERROR_* values (if any) */
+
+ XML_Parser xp; /* the actual (Expat) XML parser */
+ XML_Error xp_err; /* stored Expat error code */
+ const char *xp_msg;
+ XMLParserImpl *impl;
+};
+
+
+
+apr_xml_parser *apr_xml_parser_create_ex(apr_pool_t*, void*, void*, void*);
+
+#endif
diff --git a/xml/apr_xml_libxml2.c b/xml/apr_xml_libxml2.c
new file mode 100644
index 000000000..a765f2359
--- /dev/null
+++ b/xml/apr_xml_libxml2.c
@@ -0,0 +1,96 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "apr.h"
+#include "apr_xml.h"
+
+#include <libxml/parser.h>
+#include <libxml/xmlerror.h>
+
+typedef xmlParserCtxtPtr XML_Parser;
+typedef xmlParserErrors XML_Error;
+
+#include "apr_xml_internal.h"
+
+static apr_status_t cleanup_parser(void *ctx)
+{
+ apr_xml_parser *parser = ctx;
+
+ xmlFreeParserCtxt(parser->xp);
+ parser->xp = NULL;
+
+ return APR_SUCCESS;
+}
+static int libxml2_parse(apr_xml_parser* parser, const char* data,
+ apr_size_t sz, int final)
+{
+ parser->xp_err = xmlParseChunk(parser->xp, data, sz, final);
+ if (parser->xp_err != 0) {
+ xmlErrorPtr errptr = xmlCtxtGetLastError(parser->xp);
+ parser->xp_msg = errptr->message;
+ /* this misnomer is used as a test for (any) parser error. */
+ parser->error = APR_XML_ERROR_EXPAT;
+ }
+ return parser->xp_err;
+}
+static XMLParserImpl xml_parser_libxml2 = {
+ libxml2_parse,
+ cleanup_parser
+};
+
+static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
+
+XMLParserImpl* apr_xml_get_parser_impl(void)
+{
+ return &xml_parser_libxml2;
+}
+
+
+APR_DECLARE(apr_xml_parser *) apr_xml_parser_create_ex(apr_pool_t *pool,
+ void *start_func, void *end_func, void *cdata_func)
+{
+ apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
+ /* FIXME: This is a mismatch. We should create a single global
+ * sax instance and re-use it for every parser. That means we
+ * need an up-front initialisation function.
+ */
+ xmlSAXHandlerPtr sax = apr_pcalloc(pool, sizeof(xmlSAXHandler));
+ sax->startElement = start_func;
+ sax->endElement = end_func;
+ sax->characters = cdata_func;
+ sax->initialized = 1;
+
+ parser->impl = apr_xml_get_parser_impl();
+
+ parser->p = pool;
+ parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
+
+ parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
+
+ /* ### is there a way to avoid hard-coding this? */
+ apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
+
+ parser->xp = xmlCreatePushParserCtxt(sax, parser, NULL, 0, NULL);
+ if (parser->xp == NULL) {
+ (*apr_pool_abort_get(pool))(APR_ENOMEM);
+ return NULL;
+ }
+
+ apr_pool_cleanup_register(pool, parser, cleanup_parser,
+ apr_pool_cleanup_null);
+
+ return parser;
+}