diff options
author | Daniel Veillard <veillard@src.gnome.org> | 2003-08-14 12:19:54 +0000 |
---|---|---|
committer | Daniel Veillard <veillard@src.gnome.org> | 2003-08-14 12:19:54 +0000 |
commit | ab1ae3a7685fb7891ef8f6540a0612170300b151 (patch) | |
tree | d5d0c5e6ac02b1c0a962e695605008740f465d51 | |
parent | ccf996fca0d0a22d56b8fdd63551dd14ebeb360f (diff) | |
download | libxml2-ab1ae3a7685fb7891ef8f6540a0612170300b151.tar.gz |
applied UTF-16 encoding handling patch provided by Mark Itzcovitz more
* encoding.c: applied UTF-16 encoding handling patch provided by
Mark Itzcovitz
* encoding.c parser.c: more cleanup and fixes for UTF-16 when
not having iconv support.
Daniel
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | encoding.c | 22 | ||||
-rw-r--r-- | include/libxml/parserInternals.h | 2 | ||||
-rw-r--r-- | parser.c | 20 |
4 files changed, 34 insertions, 19 deletions
@@ -1,3 +1,10 @@ +Thu Aug 14 14:13:43 CEST 2003 Daniel Veillard <daniel@veillard.com> + + * encoding.c: applied UTF-16 encoding handling patch provided by + Mark Itzcovitz + * encoding.c parser.c: more cleanup and fixes for UTF-16 when + not having iconv support. + Thu Aug 14 03:19:08 CEST 2003 Daniel Veillard <daniel@veillard.com> * Makefile.am configure.in example/Makefile.am libxml.h nanoftp.c @@ -8,7 +15,7 @@ Thu Aug 14 02:28:36 CEST 2003 Daniel Veillard <daniel@veillard.com> * parser.c: fixed the serious CPU usage problem reported by Grant Goodale - * HTMLparser.c: applied patch from Oliver Kidman about a free + * HTMLparser.c: applied patch from Oliver Stoeneberg about a free missing in htmlSAXParseDoc Tue Aug 12 22:48:10 HKT 2003 William Brack <wbrack@mmm.com.hk> @@ -875,6 +875,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, { unsigned short* out = (unsigned short*) outb; const unsigned char* processed = in; + const unsigned char *const instart = in; unsigned short* outstart= out; unsigned short* outend; const unsigned char* inend= in+*inlen; @@ -909,7 +910,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, else if (d < 0xC0) { /* trailing byte in leading position */ *outlen = (out - outstart) * 2; - *inlen = processed - in; + *inlen = processed - instart; return(-2); } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } @@ -917,7 +918,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, else { /* no chance for this in UTF-16 */ *outlen = (out - outstart) * 2; - *inlen = processed - in; + *inlen = processed - instart; return(-2); } @@ -971,7 +972,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, processed = in; } *outlen = (out - outstart) * 2; - *inlen = processed - in; + *inlen = processed - instart; return(0); } @@ -1086,6 +1087,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, { unsigned short* out = (unsigned short*) outb; const unsigned char* processed = in; + const unsigned char *const instart = in; unsigned short* outstart= out; unsigned short* outend; const unsigned char* inend= in+*inlen; @@ -1120,7 +1122,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, else if (d < 0xC0) { /* trailing byte in leading position */ *outlen = out - outstart; - *inlen = processed - in; + *inlen = processed - instart; return(-2); } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } @@ -1128,7 +1130,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, else { /* no chance for this in UTF-16 */ *outlen = out - outstart; - *inlen = processed - in; + *inlen = processed - instart; return(-2); } @@ -1179,7 +1181,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, processed = in; } *outlen = (out - outstart) * 2; - *inlen = processed - in; + *inlen = processed - instart; return(0); } @@ -1962,6 +1964,14 @@ xmlFindCharEncodingHandler(const char *name) { } } + /* + * If nothing was found and it is "UTF-16" then use the Little indian + * version. + */ + if ((xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF-16")) || + (xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF16"))) + return(xmlUTF16LEHandler); + return(NULL); } diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index 8507442b..aec4a823 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -295,7 +295,7 @@ void xmlParseElement (xmlParserCtxtPtr ctxt); xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt); xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt); xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt); -xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); +const xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); int xmlParseSDDecl (xmlParserCtxtPtr ctxt); void xmlParseXMLDecl (xmlParserCtxtPtr ctxt); void xmlParseTextDecl (xmlParserCtxtPtr ctxt); @@ -7518,7 +7518,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) { * Returns the encoding value or NULL */ -xmlChar * +const xmlChar * xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { xmlChar *encoding = NULL; const xmlChar *q; @@ -7582,23 +7582,21 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { if ((encoding != NULL) && ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { - if (ctxt->input->encoding != NULL) - xmlFree((xmlChar *) ctxt->input->encoding); - ctxt->input->encoding = encoding; - encoding = NULL; + if (ctxt->encoding != NULL) + xmlFree((xmlChar *) ctxt->encoding); + ctxt->encoding = encoding; } /* * UTF-8 encoding is handled natively */ - if ((encoding != NULL) && + else if ((encoding != NULL) && ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { - if (ctxt->input->encoding != NULL) - xmlFree((xmlChar *) ctxt->input->encoding); - ctxt->input->encoding = encoding; - encoding = NULL; + if (ctxt->encoding != NULL) + xmlFree((xmlChar *) ctxt->encoding); + ctxt->encoding = encoding; } - if (encoding != NULL) { + else if (encoding != NULL) { xmlCharEncodingHandlerPtr handler; if (ctxt->input->encoding != NULL) |