summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@src.gnome.org>2003-08-14 12:19:54 +0000
committerDaniel Veillard <veillard@src.gnome.org>2003-08-14 12:19:54 +0000
commitab1ae3a7685fb7891ef8f6540a0612170300b151 (patch)
treed5d0c5e6ac02b1c0a962e695605008740f465d51
parentccf996fca0d0a22d56b8fdd63551dd14ebeb360f (diff)
downloadlibxml2-ab1ae3a7685fb7891ef8f6540a0612170300b151.tar.gz
applied UTF-16 encoding handling patch provided by Mark Itzcovitz more
* encoding.c: applied UTF-16 encoding handling patch provided by Mark Itzcovitz * encoding.c parser.c: more cleanup and fixes for UTF-16 when not having iconv support. Daniel
-rw-r--r--ChangeLog9
-rw-r--r--encoding.c22
-rw-r--r--include/libxml/parserInternals.h2
-rw-r--r--parser.c20
4 files changed, 34 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index 059441fe..d2862c9a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Thu Aug 14 14:13:43 CEST 2003 Daniel Veillard <daniel@veillard.com>
+
+ * encoding.c: applied UTF-16 encoding handling patch provided by
+ Mark Itzcovitz
+ * encoding.c parser.c: more cleanup and fixes for UTF-16 when
+ not having iconv support.
+
Thu Aug 14 03:19:08 CEST 2003 Daniel Veillard <daniel@veillard.com>
* Makefile.am configure.in example/Makefile.am libxml.h nanoftp.c
@@ -8,7 +15,7 @@ Thu Aug 14 02:28:36 CEST 2003 Daniel Veillard <daniel@veillard.com>
* parser.c: fixed the serious CPU usage problem reported by
Grant Goodale
- * HTMLparser.c: applied patch from Oliver Kidman about a free
+ * HTMLparser.c: applied patch from Oliver Stoeneberg about a free
missing in htmlSAXParseDoc
Tue Aug 12 22:48:10 HKT 2003 William Brack <wbrack@mmm.com.hk>
diff --git a/encoding.c b/encoding.c
index e79a5f95..2d3ccd0f 100644
--- a/encoding.c
+++ b/encoding.c
@@ -875,6 +875,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
{
unsigned short* out = (unsigned short*) outb;
const unsigned char* processed = in;
+ const unsigned char *const instart = in;
unsigned short* outstart= out;
unsigned short* outend;
const unsigned char* inend= in+*inlen;
@@ -909,7 +910,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
else if (d < 0xC0) {
/* trailing byte in leading position */
*outlen = (out - outstart) * 2;
- *inlen = processed - in;
+ *inlen = processed - instart;
return(-2);
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
@@ -917,7 +918,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
else {
/* no chance for this in UTF-16 */
*outlen = (out - outstart) * 2;
- *inlen = processed - in;
+ *inlen = processed - instart;
return(-2);
}
@@ -971,7 +972,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
processed = in;
}
*outlen = (out - outstart) * 2;
- *inlen = processed - in;
+ *inlen = processed - instart;
return(0);
}
@@ -1086,6 +1087,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
{
unsigned short* out = (unsigned short*) outb;
const unsigned char* processed = in;
+ const unsigned char *const instart = in;
unsigned short* outstart= out;
unsigned short* outend;
const unsigned char* inend= in+*inlen;
@@ -1120,7 +1122,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
else if (d < 0xC0) {
/* trailing byte in leading position */
*outlen = out - outstart;
- *inlen = processed - in;
+ *inlen = processed - instart;
return(-2);
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
@@ -1128,7 +1130,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
else {
/* no chance for this in UTF-16 */
*outlen = out - outstart;
- *inlen = processed - in;
+ *inlen = processed - instart;
return(-2);
}
@@ -1179,7 +1181,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
processed = in;
}
*outlen = (out - outstart) * 2;
- *inlen = processed - in;
+ *inlen = processed - instart;
return(0);
}
@@ -1962,6 +1964,14 @@ xmlFindCharEncodingHandler(const char *name) {
}
}
+ /*
+ * If nothing was found and it is "UTF-16" then use the Little indian
+ * version.
+ */
+ if ((xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF-16")) ||
+ (xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF16")))
+ return(xmlUTF16LEHandler);
+
return(NULL);
}
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 8507442b..aec4a823 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -295,7 +295,7 @@ void xmlParseElement (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
-xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
+const xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
diff --git a/parser.c b/parser.c
index cb7fce4f..82caf10b 100644
--- a/parser.c
+++ b/parser.c
@@ -7518,7 +7518,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) {
* Returns the encoding value or NULL
*/
-xmlChar *
+const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
xmlChar *encoding = NULL;
const xmlChar *q;
@@ -7582,23 +7582,21 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
if ((encoding != NULL) &&
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
- if (ctxt->input->encoding != NULL)
- xmlFree((xmlChar *) ctxt->input->encoding);
- ctxt->input->encoding = encoding;
- encoding = NULL;
+ if (ctxt->encoding != NULL)
+ xmlFree((xmlChar *) ctxt->encoding);
+ ctxt->encoding = encoding;
}
/*
* UTF-8 encoding is handled natively
*/
- if ((encoding != NULL) &&
+ else if ((encoding != NULL) &&
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
- if (ctxt->input->encoding != NULL)
- xmlFree((xmlChar *) ctxt->input->encoding);
- ctxt->input->encoding = encoding;
- encoding = NULL;
+ if (ctxt->encoding != NULL)
+ xmlFree((xmlChar *) ctxt->encoding);
+ ctxt->encoding = encoding;
}
- if (encoding != NULL) {
+ else if (encoding != NULL) {
xmlCharEncodingHandlerPtr handler;
if (ctxt->input->encoding != NULL)