/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "apr.h" #if APU_USE_XMLLITE #include "apr_arch_utf8.h" #include "apr_xml.h" typedef struct xmllite_parser_s* XML_Parser; typedef int XML_Error; #include "apr_xml_internal.h" #define CINTERFACE #define COBJMACROS #define interface struct typedef void * LPMSG; #include #include "apr_xml_internal.h" typedef struct xml_stream_t { ISequentialStream sequental_stream; ULONG refcount; const char *data; apr_size_t remaining; int is_final; } xml_stream_t; struct xmllite_parser_s { IXmlReader *xml_reader; xml_stream_t *input_stream; apr_pool_t *iterpool; apr_status_t (*current_state)(apr_xml_parser *parser, apr_pool_t *scratch_pool); void (*start_func)(void *userdata, const char *name, const char **attrs); void (*end_func)(void *userdata, const char *name); void (*cdata_func)(void *userdata, const char *data, int len); }; static HRESULT STDMETHODCALLTYPE stream_QueryInterface(ISequentialStream * This, REFIID riid, void **ppvObject) { xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream); if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_ISequentialStream)) { InterlockedIncrement(&obj->refcount); *ppvObject = &obj->sequental_stream; return S_OK; } else { *ppvObject = NULL; return E_NOINTERFACE; } } static ULONG STDMETHODCALLTYPE stream_AddRef(ISequentialStream * This) { xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream); return InterlockedIncrement(&obj->refcount); } static ULONG STDMETHODCALLTYPE stream_Release(ISequentialStream * This) { xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream); ULONG refcount = InterlockedDecrement(&obj->refcount); if (refcount == 0) { free(obj); } return refcount; } static HRESULT STDMETHODCALLTYPE stream_Read(ISequentialStream * This, void *pv, ULONG cb, ULONG *pcbRead) { xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream); ULONG read = cb; if (read > obj->remaining) { read = (ULONG) obj->remaining; } memcpy(pv, obj->data, read); obj->data += read; obj->remaining -= read; *pcbRead = read; if (read == cb) { return S_OK; } else if (read < cb && !obj->is_final) { return E_PENDING; } else { return S_FALSE; } } static HRESULT STDMETHODCALLTYPE stream_Write(ISequentialStream * This, const void *pv, ULONG cb, ULONG *pcbWritten) { xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream); return E_NOTIMPL; } static ISequentialStreamVtbl stream_vtable = { stream_QueryInterface, stream_AddRef, stream_Release, stream_Read, stream_Write }; static apr_status_t cleanup_xml_stream(void *ctx) { xml_stream_t *xml_stream = ctx; ISequentialStream_Release(&xml_stream->sequental_stream); return APR_SUCCESS; } static xml_stream_t *create_xml_stream(apr_pool_t *pool) { xml_stream_t *xml_stream = malloc(sizeof(*xml_stream)); if (xml_stream == NULL) { return NULL; } memset(xml_stream, 0, sizeof(*xml_stream)); xml_stream->sequental_stream.lpVtbl = &stream_vtable; xml_stream->refcount = 1; apr_pool_cleanup_register(pool, xml_stream, cleanup_xml_stream, apr_pool_cleanup_null); return xml_stream; } static apr_status_t cleanup_parser(void *ctx) { apr_xml_parser *parser = ctx; if (parser->xp->xml_reader) { IXmlReader_Release(parser->xp->xml_reader); parser->xp->xml_reader = NULL; } return APR_SUCCESS; } static apr_status_t wstr2utf(const char **utf_p, apr_size_t *utf_len_p, LPCWSTR wstr, apr_size_t wlen, apr_pool_t *pool) { apr_size_t result_len; char *result; if (wlen > 0) { apr_status_t status; apr_size_t bufsize = wlen * 3; apr_size_t outbytes = bufsize; result = apr_palloc(pool, outbytes + 1); if (!result) { return APR_ENOMEM; } status = apr_conv_utf16_to_utf8(wstr, &wlen, result, &outbytes); if (status) { return status; } result_len = bufsize - outbytes; result[result_len] = 0; } else { result = ""; result_len = 0; } *utf_p = result; if (utf_len_p) { *utf_len_p = result_len; } return APR_SUCCESS; } static char * get_xmllite_errmsg(HRESULT hr) { switch(hr) { case MX_E_INPUTEND: return "unexpected end of input"; case MX_E_ENCODING: return "unrecognized encoding"; case MX_E_ENCODINGSWITCH: return "unable to switch the encoding"; case MX_E_ENCODINGSIGNATURE: return "unrecognized input signature"; case WC_E_WHITESPACE: return "whitespace expected"; case WC_E_SEMICOLON: return "semicolon expected"; case WC_E_GREATERTHAN: return "'>' expected"; case WC_E_QUOTE: return "quote expected"; case WC_E_EQUAL: return "equal expected"; case WC_E_LESSTHAN: return "well-formedness constraint: no '<' in attribute value"; case WC_E_HEXDIGIT: return "hexadecimal digit expected"; case WC_E_DIGIT: return "decimal digit expected"; case WC_E_LEFTBRACKET: return "'[' expected"; case WC_E_LEFTPAREN: return "'(' expected"; case WC_E_XMLCHARACTER: return "illegal xml character"; case WC_E_NAMECHARACTER: return "illegal name character"; case WC_E_SYNTAX: return "incorrect document syntax"; case WC_E_CDSECT: return "incorrect CDATA section syntax"; case WC_E_COMMENT: return "incorrect comment syntax"; case WC_E_CONDSECT: return "incorrect conditional section syntax"; case WC_E_DECLATTLIST: return "incorrect ATTLIST declaration syntax"; case WC_E_DECLDOCTYPE: return "incorrect DOCTYPE declaration syntax"; case WC_E_DECLELEMENT: return "incorrect ELEMENT declaration syntax"; case WC_E_DECLENTITY: return "incorrect ENTITY declaration syntax"; case WC_E_DECLNOTATION: return "incorrect NOTATION declaration syntax"; case WC_E_NDATA: return "NDATA expected"; case WC_E_PUBLIC: return "PUBLIC expected"; case WC_E_SYSTEM: return "SYSTEM expected"; case WC_E_NAME: return "name expected"; case WC_E_ROOTELEMENT: return "one root element"; case WC_E_ELEMENTMATCH: return "well-formedness constraint: element type match"; case WC_E_UNIQUEATTRIBUTE: return "well-formedness constraint: unique attribute spec"; case WC_E_TEXTXMLDECL: return "text/xmldecl not at the beginning of input"; case WC_E_LEADINGXML: return "leading \"xml\""; case WC_E_TEXTDECL: return "incorrect text declaration syntax"; case WC_E_XMLDECL: return "incorrect xml declaration syntax"; case WC_E_ENCNAME: return "incorrect encoding name syntax"; case WC_E_PUBLICID: return "incorrect public identifier syntax"; case WC_E_PESINTERNALSUBSET: return "well-formedness constraint: pes in internal subset"; case WC_E_PESBETWEENDECLS: return "well-formedness constraint: pes between declarations"; case WC_E_NORECURSION: return "well-formedness constraint: no recursion"; case WC_E_ENTITYCONTENT: return "entity content not well formed"; case WC_E_UNDECLAREDENTITY: return "well-formedness constraint: undeclared entity"; case WC_E_PARSEDENTITY: return "well-formedness constraint: parsed entity"; case WC_E_NOEXTERNALENTITYREF: return "well-formedness constraint: no external entity references"; case WC_E_PI: return "incorrect processing instruction syntax"; case WC_E_SYSTEMID: return "incorrect system identifier syntax"; case WC_E_QUESTIONMARK: return "'?' expected"; case WC_E_CDSECTEND: return "no ']]>' in element content"; case WC_E_MOREDATA: return "not all chunks of value have been read"; case WC_E_DTDPROHIBITED: return "DTD was found but is prohibited"; case WC_E_INVALIDXMLSPACE: return "xml:space attribute with invalid value"; case NC_E_QNAMECHARACTER: return "illegal qualified name character"; case NC_E_QNAMECOLON: return "multiple colons in qualified name"; case NC_E_NAMECOLON: return "colon in name"; case NC_E_DECLAREDPREFIX: return "declared prefix"; case NC_E_UNDECLAREDPREFIX: return "undeclared prefix"; case NC_E_EMPTYURI: return "non default namespace with empty uri"; case NC_E_XMLPREFIXRESERVED: return "\"xml\" prefix is reserved and must have the " "http://www.w3.org/XML/1998/namespace URI"; case NC_E_XMLNSPREFIXRESERVED: return "\"xmlns\" prefix is reserved for use by XML"; case NC_E_XMLURIRESERVED: return "xml namespace URI (http://www.w3.org/XML/1998/namespace) must " "be assigned only to prefix \"xml\""; case NC_E_XMLNSURIRESERVED: return "xmlns namespace URI (http://www.w3.org/2000/xmlns/) is " "reserved and must not be used"; case SC_E_MAXELEMENTDEPTH: return "element depth exceeds limit"; case SC_E_MAXENTITYEXPANSION: return "entity expansion exceeds limit"; case XML_E_INVALID_DECIMAL: return "character in character entity is not a decimal digit " "as was expected."; case XML_E_INVALID_HEXIDECIMAL: return "character in character entity is not a hexadecimal " "digit as was expected."; case XML_E_INVALID_UNICODE: return "character entity has invalid Unicode value."; default: return ""; } } static apr_status_t handle_xmllite_err(apr_xml_parser *parser, HRESULT hr) { parser->xp_err = hr; parser->xp_msg = get_xmllite_errmsg(hr); /* this misnomer is used as a test for (any) parser error. */ parser->error = APR_XML_ERROR_EXPAT; return APR_EGENERAL; } static apr_status_t cdata_state(apr_xml_parser *parser, apr_pool_t *scratch_pool); static apr_status_t read_state(apr_xml_parser *parser, apr_pool_t *scratch_pool) { HRESULT hr; XmlNodeType node_type; apr_status_t status; hr = IXmlReader_Read(parser->xp->xml_reader, &node_type); if (hr == E_PENDING) { return APR_EAGAIN; } else if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } if (node_type == XmlNodeType_Element) { LPCWSTR wname; UINT wname_len; UINT attr_count; char **attrs; const char* elem_name; hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader, &wname, &wname_len); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } status = wstr2utf(&elem_name, NULL, wname, wname_len, scratch_pool); if (status) { return status; } hr = IXmlReader_GetAttributeCount(parser->xp->xml_reader, &attr_count); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } if (attr_count > 0) { UINT i; attrs = apr_palloc(parser->p, sizeof(char*) * (attr_count + 1) * 2); hr = IXmlReader_MoveToFirstAttribute(parser->xp->xml_reader); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } for (i = 0; i < attr_count; i++) { hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader, &wname, &wname_len); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } status = wstr2utf(&attrs[i * 2], NULL, wname, wname_len, scratch_pool); if (status) { return status; } hr = IXmlReader_GetValue(parser->xp->xml_reader, &wname, &wname_len); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } status = wstr2utf(&attrs[i * 2 + 1], NULL, wname, wname_len, scratch_pool); if (status) { return status; } hr = IXmlReader_MoveToNextAttribute(parser->xp->xml_reader); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } } attrs[i * 2] = NULL; attrs[i * 2 + 1] = NULL; hr = IXmlReader_MoveToElement(parser->xp->xml_reader); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } } else { static char* no_attrs[] = { NULL, NULL }; attrs = no_attrs; } parser->xp->start_func(parser, elem_name, attrs); if (IXmlReader_IsEmptyElement(parser->xp->xml_reader)) { parser->xp->end_func(parser, elem_name); } } else if (node_type == XmlNodeType_EndElement) { LPCWSTR wname; UINT wname_len; const char *elem_name; hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader, &wname, &wname_len); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } status = wstr2utf(&elem_name, NULL, wname, wname_len, scratch_pool); if (status) { return status; } parser->xp->end_func(parser, elem_name); } else if (node_type == XmlNodeType_CDATA || node_type == XmlNodeType_Text) { parser->xp->current_state = cdata_state; } else if (node_type == XmlNodeType_Whitespace) { UINT depth; hr = IXmlReader_GetDepth(parser->xp->xml_reader, &depth); if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } /* Report whitespaces as cdata (the same as Expat does), but ignore them when depth == 0. */ if (depth > 0) { parser->xp->current_state = cdata_state; } } else if (node_type == XmlNodeType_None) { return APR_EOF; } return APR_SUCCESS; } static apr_status_t cdata_state(apr_xml_parser *parser, apr_pool_t *scratch_pool) { HRESULT hr; apr_status_t status; WCHAR buf[4000]; UINT read_count; hr = IXmlReader_ReadValueChunk(parser->xp->xml_reader, buf, sizeof(buf) / sizeof(buf[0]), &read_count); if (hr == E_PENDING) { return APR_EAGAIN; } else if (FAILED(hr)) { return handle_xmllite_err(parser, hr); } if (read_count > 0) { const char *cdata; apr_size_t cdata_len; status = wstr2utf(&cdata, &cdata_len, buf, read_count, scratch_pool); if (status) { return status; } parser->xp->cdata_func(parser, cdata, (int) cdata_len); } if (hr == S_FALSE) { parser->xp->current_state = read_state; } return APR_SUCCESS; } static apr_status_t do_parse(apr_xml_parser *parser, const char *data, apr_size_t len, int is_final) { apr_status_t status; apr_pool_t *iterpool = parser->xp->iterpool; parser->xp->input_stream->data = data; parser->xp->input_stream->remaining = len; parser->xp->input_stream->is_final = is_final; while (TRUE) { apr_pool_clear(iterpool); status = parser->xp->current_state(parser, iterpool); if (status != APR_SUCCESS) { break; } } if (status == APR_EAGAIN || status == APR_EOF) { status = APR_SUCCESS; } return status; } static XMLParserImpl xml_parser_xmllite = { do_parse, cleanup_parser }; XMLParserImpl* apr_xml_get_parser_impl(void) { return &xml_parser_xmllite; } static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' }; apr_xml_parser* apr_xml_parser_create_internal(apr_pool_t *pool, void *start_func, void *end_func, void *cdata_func) { apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser)); HRESULT hr; parser->impl = apr_xml_get_parser_impl(); parser->p = pool; parser->doc = apr_pcalloc(pool, sizeof(*parser->doc)); parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *)); /* ### is there a way to avoid hard-coding this? */ apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV); parser->xp = apr_pcalloc(pool, sizeof(struct xmllite_parser_s)); parser->xp->current_state = read_state; parser->xp->start_func = start_func; parser->xp->end_func = end_func; parser->xp->cdata_func = cdata_func; apr_pool_create(&parser->xp->iterpool, pool); parser->xp->input_stream = create_xml_stream(pool); if (parser->xp->input_stream == NULL) { (*apr_pool_abort_get(pool))(APR_ENOMEM); return NULL; } hr = CreateXmlReader(&IID_IXmlReader, &parser->xp->xml_reader, NULL); if (FAILED(hr)) { return NULL; } apr_pool_cleanup_register(pool, parser, cleanup_parser, apr_pool_cleanup_null); hr = IXmlReader_SetInput(parser->xp->xml_reader, (IUnknown*) &parser->xp->input_stream->sequental_stream); if (FAILED(hr)) { return NULL; } return parser; } #endif