diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-12-28 00:06:36 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-12-28 00:06:36 +0000 |
commit | 6eb5949dd99d174393465069c2fd0bab32deddcc (patch) | |
tree | d104f72ac2dcb0f592b1c8e06a5af6790bd4080c /gettext-tools/gnulib-lib/libxml/HTMLtree.c | |
parent | 482840e61f86ca321838a91e902c41d40c098bbb (diff) | |
download | gettext-tarball-6eb5949dd99d174393465069c2fd0bab32deddcc.tar.gz |
gettext-0.19.7gettext-0.19.7
Diffstat (limited to 'gettext-tools/gnulib-lib/libxml/HTMLtree.c')
-rw-r--r-- | gettext-tools/gnulib-lib/libxml/HTMLtree.c | 258 |
1 files changed, 163 insertions, 95 deletions
diff --git a/gettext-tools/gnulib-lib/libxml/HTMLtree.c b/gettext-tools/gnulib-lib/libxml/HTMLtree.c index c1e5a0a..2fd0c9c 100644 --- a/gettext-tools/gnulib-lib/libxml/HTMLtree.c +++ b/gettext-tools/gnulib-lib/libxml/HTMLtree.c @@ -30,16 +30,18 @@ #include <libxml/globals.h> #include <libxml/uri.h> +#include "buf.h" + /************************************************************************ * * - * Getting/Setting encoding meta tags * + * Getting/Setting encoding meta tags * * * ************************************************************************/ /** * htmlGetMetaEncoding: * @doc: the document - * + * * Encoding definition lookup in the Meta tags * * Returns the current encoding as flagged in the HTML source @@ -126,17 +128,17 @@ found_meta: found_content: encoding = xmlStrstr(content, BAD_CAST"charset="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"Charset="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"CHARSET="); if (encoding != NULL) { encoding += 8; } else { encoding = xmlStrstr(content, BAD_CAST"charset ="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"Charset ="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); if (encoding != NULL) encoding += 9; @@ -151,7 +153,7 @@ found_content: * htmlSetMetaEncoding: * @doc: the document * @encoding: the encoding string - * + * * Sets the current encoding in the Meta tags * NOTE: this will not change the document content encoding, just * the META flag associated. @@ -160,14 +162,19 @@ found_content: */ int htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { - htmlNodePtr cur, meta; - const xmlChar *content; + htmlNodePtr cur, meta = NULL, head = NULL; + const xmlChar *content = NULL; char newcontent[100]; + newcontent[0] = 0; if (doc == NULL) return(-1); + /* html isn't a real encoding it's just libxml2 way to get entities */ + if (!xmlStrcasecmp(encoding, BAD_CAST "html")) + return(-1); + if (encoding != NULL) { snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", (char *)encoding); @@ -201,39 +208,24 @@ htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) break; - if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) + if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { + head = cur->parent; goto found_meta; + } } cur = cur->next; } if (cur == NULL) return(-1); found_head: - if (cur->children == NULL) { - if (encoding == NULL) - return(0); - meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); - xmlAddChild(cur, meta); - xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); - xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); - return(0); - } + head = cur; + if (cur->children == NULL) + goto create; cur = cur->children; found_meta: - if (encoding != NULL) { - /* - * Create a new Meta element with the right attributes - */ - - meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); - xmlAddPrevSibling(cur, meta); - xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); - xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); - } - /* - * Search and destroy all the remaining the meta elements carrying + * Search and update all the remaining the meta elements carrying * encoding informations */ while (cur != NULL) { @@ -253,11 +245,11 @@ found_meta: if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) http = 1; - else + else { - if ((value != NULL) && - (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) - content = value; + if ((value != NULL) && + (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) + content = value; } if ((http != 0) && (content != NULL)) break; @@ -266,16 +258,41 @@ found_meta: } if ((http != 0) && (content != NULL)) { meta = cur; - cur = cur->next; - xmlUnlinkNode(meta); - xmlFreeNode(meta); - continue; + break; } } } cur = cur->next; } +create: + if (meta == NULL) { + if ((encoding != NULL) && (head != NULL)) { + /* + * Create a new Meta element with the right attributes + */ + + meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); + if (head->children == NULL) + xmlAddChild(head, meta); + else + xmlAddPrevSibling(head->children, meta); + xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); + xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); + } + } else { + /* remove the meta tag if NULL is passed */ + if (encoding == NULL) { + xmlUnlinkNode(meta); + xmlFreeNode(meta); + } + /* change the document only if there is a real encoding change */ + else if (xmlStrcasestr(content, encoding) == NULL) { + xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent); + } + } + + return(0); } @@ -299,7 +316,7 @@ static const char* htmlBooleanAttrs[] = { * @name: the name of the attribute to check * * Determine if a given attribute is a boolean attribute. - * + * * returns: false if the attribute is not boolean, true otherwise. */ int @@ -316,9 +333,14 @@ htmlIsBooleanAttr(const xmlChar *name) } #ifdef LIBXML_OUTPUT_ENABLED +/* + * private routine exported from xmlIO.c + */ +xmlOutputBufferPtr +xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); /************************************************************************ * * - * Output error handlers * + * Output error handlers * * * ************************************************************************/ /** @@ -367,17 +389,13 @@ htmlSaveErr(int code, xmlNodePtr node, const char *extra) /************************************************************************ * * - * Dumping HTML tree content to a simple buffer * + * Dumping HTML tree content to a simple buffer * * * ************************************************************************/ -static int -htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, - int format); - /** - * htmlNodeDumpFormat: - * @buf: the HTML buffer output + * htmlBufNodeDumpFormat: + * @buf: the xmlBufPtr output * @doc: the document * @cur: the current node * @format: should formatting spaces been added @@ -386,10 +404,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, * * Returns the number of byte written or -1 in case of error */ -static int -htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, +static size_t +htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) { - unsigned int use; + size_t use; int ret; xmlOutputBufferPtr outbuf; @@ -412,10 +430,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, outbuf->context = NULL; outbuf->written = 0; - use = buf->use; + use = xmlBufUse(buf); htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); xmlFree(outbuf); - ret = buf->use - use; + ret = xmlBufUse(buf) - use; return (ret); } @@ -432,9 +450,24 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, */ int htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { + xmlBufPtr buffer; + size_t ret; + + if ((buf == NULL) || (cur == NULL)) + return(-1); + xmlInitParser(); + buffer = xmlBufFromBuffer(buf); + if (buffer == NULL) + return(-1); + + ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1); - return(htmlNodeDumpFormat(buf, doc, cur, 1)); + xmlBufBackToBuffer(buffer); + + if (ret > INT_MAX) + return(-1); + return((int) ret); } /** @@ -467,7 +500,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, if (enc != XML_CHAR_ENCODING_UTF8) { handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) - return(-1); + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); } } @@ -479,7 +512,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFile(out, handler); @@ -548,11 +581,9 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { } handler = xmlFindCharEncodingHandler(encoding); - if (handler == NULL) { - *mem = NULL; - *size = 0; - return; - } + if (handler == NULL) + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); + } else { handler = xmlFindCharEncodingHandler(encoding); } @@ -566,22 +597,22 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - buf = xmlAllocOutputBuffer(handler); + buf = xmlAllocOutputBufferInternal(handler); if (buf == NULL) { *mem = NULL; *size = 0; return; } - htmlDocContentDumpFormatOutput(buf, cur, NULL, format); + htmlDocContentDumpFormatOutput(buf, cur, NULL, format); xmlOutputBufferFlush(buf); if (buf->conv != NULL) { - *size = buf->conv->use; - *mem = xmlStrndup(buf->conv->content, *size); + *size = xmlBufUse(buf->conv); + *mem = xmlStrndup(xmlBufContent(buf->conv), *size); } else { - *size = buf->buffer->use; - *mem = xmlStrndup(buf->buffer->content, *size); + *size = xmlBufUse(buf->buffer); + *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); } (void)xmlOutputBufferClose(buf); } @@ -603,7 +634,7 @@ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { /************************************************************************ * * - * Dumping HTML tree content to an I/O output buffer * + * Dumping HTML tree content to an I/O output buffer * * * ************************************************************************/ @@ -614,7 +645,7 @@ void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); * @buf: the HTML buffer output * @doc: the document * @encoding: the encoding string - * + * * TODO: check whether encoding is needed * * Dump the HTML document DTD, if any. @@ -632,14 +663,15 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlOutputBufferWriteString(buf, (const char *)cur->name); if (cur->ExternalID != NULL) { xmlOutputBufferWriteString(buf, " PUBLIC "); - xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); + xmlBufWriteQuotedString(buf->buffer, cur->ExternalID); if (cur->SystemID != NULL) { xmlOutputBufferWriteString(buf, " "); - xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); - } - } else if (cur->SystemID != NULL) { + xmlBufWriteQuotedString(buf->buffer, cur->SystemID); + } + } else if (cur->SystemID != NULL && + xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) { xmlOutputBufferWriteString(buf, " SYSTEM "); - xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); + xmlBufWriteQuotedString(buf->buffer, cur->SystemID); } xmlOutputBufferWriteString(buf, ">\n"); } @@ -659,9 +691,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, xmlChar *value; /* - * TODO: The html output method should not escape a & character - * occurring in an attribute value immediately followed by - * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * The html output method should not escape a & character + * occurring in an attribute value immediately followed by + * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * This is implemented in xmlEncodeEntitiesReentrant */ if (cur == NULL) { @@ -684,20 +717,51 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { - xmlChar *escaped; xmlChar *tmp = value; + /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ + xmlBufCCat(buf->buffer, "\""); while (IS_BLANK_CH(*tmp)) tmp++; - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); - if (escaped != NULL) { - xmlBufferWriteQuotedString(buf->buffer, escaped); - xmlFree(escaped); - } else { - xmlBufferWriteQuotedString(buf->buffer, value); + /* URI Escape everything, except server side includes. */ + for ( ; ; ) { + xmlChar *escaped; + xmlChar endChar; + xmlChar *end = NULL; + xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--"); + if (start != NULL) { + end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->"); + if (end != NULL) { + *start = '\0'; + } + } + + /* Escape the whole string, or until start (set to '\0'). */ + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); + if (escaped != NULL) { + xmlBufCat(buf->buffer, escaped); + xmlFree(escaped); + } else { + xmlBufCat(buf->buffer, tmp); + } + + if (end == NULL) { /* Everything has been written. */ + break; + } + + /* Do not escape anything within server side includes. */ + *start = '<'; /* Restore the first character of "<!--". */ + end += 3; /* strlen("-->") */ + endChar = *end; + *end = '\0'; + xmlBufCat(buf->buffer, start); + *end = endChar; + tmp = end; } + + xmlBufCCat(buf->buffer, "\""); } else { - xmlBufferWriteQuotedString(buf->buffer, value); + xmlBufWriteQuotedString(buf->buffer, value); } xmlFree(value); } else { @@ -780,6 +844,10 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); return; } + if (cur->type == XML_ATTRIBUTE_NODE) { + htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); + return; + } if (cur->type == HTML_TEXT_NODE) { if (cur->content != NULL) { if (((cur->name == (const xmlChar *)xmlStringText) || @@ -1043,7 +1111,7 @@ htmlDocDump(FILE *f, xmlDocPtr cur) { handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) - return(-1); + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); } else { handler = xmlFindCharEncodingHandler(encoding); } @@ -1083,7 +1151,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { if ((cur == NULL) || (filename == NULL)) return(-1); - + xmlInitParser(); encoding = (const char *) htmlGetMetaEncoding(cur); @@ -1102,7 +1170,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) - return(-1); + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); } } @@ -1114,7 +1182,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); @@ -1134,7 +1202,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { * @encoding: the document encoding * * Dump an HTML document to a file using a given encoding. - * + * * returns: the number of byte written or -1 in case of failure. */ int @@ -1146,7 +1214,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, if ((cur == NULL) || (filename == NULL)) return(-1); - + xmlInitParser(); if (encoding != NULL) { @@ -1163,9 +1231,9 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) - return(-1); - htmlSetMetaEncoding(cur, (const xmlChar *) encoding); + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); } + htmlSetMetaEncoding(cur, (const xmlChar *) encoding); } else { htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); } @@ -1178,7 +1246,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFilename(filename, handler, 0); @@ -1198,7 +1266,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, * * Dump an HTML document to a file using a given encoding * and formatting returns/spaces are added. - * + * * returns: the number of byte written or -1 in case of failure. */ int |