diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-12-28 00:06:36 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2015-12-28 00:06:36 +0000 |
commit | 6eb5949dd99d174393465069c2fd0bab32deddcc (patch) | |
tree | d104f72ac2dcb0f592b1c8e06a5af6790bd4080c /gnulib-local/lib/libxml/encoding.c | |
parent | 482840e61f86ca321838a91e902c41d40c098bbb (diff) | |
download | gettext-tarball-6eb5949dd99d174393465069c2fd0bab32deddcc.tar.gz |
gettext-0.19.7gettext-0.19.7
Diffstat (limited to 'gnulib-local/lib/libxml/encoding.c')
-rw-r--r-- | gnulib-local/lib/libxml/encoding.c | 1500 |
1 files changed, 1145 insertions, 355 deletions
diff --git a/gnulib-local/lib/libxml/encoding.c b/gnulib-local/lib/libxml/encoding.c index ee33df1..574e1ae 100644 --- a/gnulib-local/lib/libxml/encoding.c +++ b/gnulib-local/lib/libxml/encoding.c @@ -1,7 +1,7 @@ /* * encoding.c : implements the encoding conversion functions needed for XML * - * Related specs: + * Related specs: * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau * [ISO-10646] UTF-8 and UTF-16 in Annexes @@ -24,6 +24,7 @@ #include "libxml.h" #include <string.h> +#include <limits.h> #ifdef HAVE_CTYPE_H #include <ctype.h> @@ -44,6 +45,9 @@ #include <libxml/globals.h> #include <libxml/xmlerror.h> +#include "buf.h" +#include "enc.h" + static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; @@ -58,7 +62,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; static int xmlCharEncodingAliasesNb = 0; static int xmlCharEncodingAliasesMax = 0; -#ifdef LIBXML_ICONV_ENABLED +#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) #if 0 #define DEBUG_ENCODING /* Define this to get encoding traces */ #endif @@ -97,6 +101,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) NULL, 0, val, NULL, NULL, 0, 0, msg, val); } +#ifdef LIBXML_ICU_ENABLED +static uconv_t* +openIcuConverter(const char* name, int toUnicode) +{ + UErrorCode status = U_ZERO_ERROR; + uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); + if (conv == NULL) + return NULL; + + conv->uconv = ucnv_open(name, &status); + if (U_FAILURE(status)) + goto error; + + status = U_ZERO_ERROR; + if (toUnicode) { + ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, + NULL, NULL, NULL, &status); + } + else { + ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, + NULL, NULL, NULL, &status); + } + if (U_FAILURE(status)) + goto error; + + status = U_ZERO_ERROR; + conv->utf8 = ucnv_open("UTF-8", &status); + if (U_SUCCESS(status)) + return conv; + +error: + if (conv->uconv) + ucnv_close(conv->uconv); + xmlFree(conv); + return NULL; +} + +static void +closeIcuConverter(uconv_t *conv) +{ + if (conv != NULL) { + ucnv_close(conv->uconv); + ucnv_close(conv->utf8); + xmlFree(conv); + } +} +#endif /* LIBXML_ICU_ENABLED */ + /************************************************************************ * * * Conversions To/From UTF8 encoding * @@ -135,12 +187,12 @@ asciiToUTF8(unsigned char* out, int *outlen, break; if (c < 0x80) { *out++ = c; - } else { + } else { *outlen = out - outstart; *inlen = processed - base; return(-1); } - + processed = (const unsigned char*) in; } *outlen = out - outstart; @@ -206,7 +258,7 @@ UTF8Toascii(unsigned char* out, int *outlen, if (inend - in < trailing) { break; - } + } for ( ; trailing; trailing--) { if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) @@ -263,19 +315,19 @@ isolat1ToUTF8(unsigned char* out, int *outlen, outend = out + *outlen; inend = in + (*inlen); instop = inend; - - while (in < inend && out < outend - 1) { - if (*in >= 0x80) { + + while ((in < inend) && (out < outend - 1)) { + if (*in >= 0x80) { *out++ = (((*in) >> 6) & 0x1F) | 0xC0; - *out++ = ((*in) & 0x3F) | 0x80; + *out++ = ((*in) & 0x3F) | 0x80; ++in; } - if (instop - in > outend - out) instop = in + (outend - out); - while (in < instop && *in < 0x80) { + if ((instop - in) > (outend - out)) instop = in + (outend - out); + while ((in < instop) && (*in < 0x80)) { *out++ = *in++; } - } - if (in < inend && out < outend && *in < 0x80) { + } + if ((in < inend) && (out < outend) && (*in < 0x80)) { *out++ = *in++; } *outlen = out - outstart; @@ -379,7 +431,7 @@ UTF8Toisolat1(unsigned char* out, int *outlen, if (inend - in < trailing) { break; - } + } for ( ; trailing; trailing--) { if (in >= inend) @@ -487,7 +539,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen, else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } - + for ( ; bits >= 0; bits-= 6) { if (out >= outend) break; @@ -512,7 +564,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen, * block of chars out. * * Returns the number of bytes written, or -1 if lack of space, or -2 - * if the transcoding failed. + * if the transcoding failed. */ static int UTF8ToUTF16LE(unsigned char* outb, int *outlen, @@ -558,7 +610,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, if (inend - in < trailing) { break; - } + } for ( ; trailing; trailing--) { if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) @@ -621,7 +673,7 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen, * block of chars out. * * Returns the number of bytes written, or -1 if lack of space, or -2 - * if the transcoding failed. + * if the transcoding failed. */ static int UTF8ToUTF16(unsigned char* outb, int *outlen, @@ -693,7 +745,7 @@ UTF16BEToUTF8(unsigned char* out, int *outlen, in++; } else { c= *in++; - } + } if ((c & 0xFC00) == 0xD800) { /* surrogates */ if (in >= inend) { /* (in > inend) shouldn't happens */ *outlen = out - outstart; @@ -723,15 +775,15 @@ UTF16BEToUTF8(unsigned char* out, int *outlen, } /* assertion: c is a single UTF-4 value */ - if (out >= outend) + if (out >= outend) break; if (c < 0x80) { *out++= c; bits= -6; } else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } - + for ( ; bits >= 0; bits-= 6) { - if (out >= outend) + if (out >= outend) break; *out++= ((c >> bits) & 0x3F) | 0x80; } @@ -754,7 +806,7 @@ UTF16BEToUTF8(unsigned char* out, int *outlen, * block of chars out. * * Returns the number of byte written, or -1 by lack of space, or -2 - * if the transcoding failed. + * if the transcoding failed. */ static int UTF8ToUTF16BE(unsigned char* outb, int *outlen, @@ -800,7 +852,7 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, if (inend - in < trailing) { break; - } + } for ( ; trailing; trailing--) { if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; @@ -864,13 +916,13 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen, * * Guess the encoding of the entity using the first bytes of the entity content * according to the non-normative appendix F of the XML-1.0 recommendation. - * + * * Returns one of the XML_CHAR_ENCODING_... values. */ xmlCharEncoding xmlDetectCharEncoding(const unsigned char* in, int len) { - if (in == NULL) + if (in == NULL) return(XML_CHAR_ENCODING_NONE); if (len >= 4) { if ((in[0] == 0x00) && (in[1] == 0x00) && @@ -951,7 +1003,7 @@ xmlCleanupEncodingAliases(void) { * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) * * Lookup an encoding name for the given alias. - * + * * Returns NULL if not found, otherwise the original name */ const char * @@ -989,7 +1041,7 @@ xmlGetEncodingAlias(const char *alias) { * * Registers an alias @alias for an encoding named @name. Existing alias * will be overwritten. - * + * * Returns 0 in case of success, -1 in case of error */ int @@ -1009,13 +1061,13 @@ xmlAddEncodingAlias(const char *name, const char *alias) { if (xmlCharEncodingAliases == NULL) { xmlCharEncodingAliasesNb = 0; xmlCharEncodingAliasesMax = 20; - xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) + xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); if (xmlCharEncodingAliases == NULL) return(-1); } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { xmlCharEncodingAliasesMax *= 2; - xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) + xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) xmlRealloc(xmlCharEncodingAliases, xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); } @@ -1046,7 +1098,7 @@ xmlAddEncodingAlias(const char *name, const char *alias) { * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) * * Unregisters an encoding alias @alias - * + * * Returns 0 in case of success, -1 in case of error */ int @@ -1081,7 +1133,7 @@ xmlDelEncodingAlias(const char *alias) { * Compare the string to the encoding schemes already known. Note * that the comparison is case insensitive accordingly to the section * [XML] 4.3.3 Character Encoding in Entities. - * + * * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE * if not recognized. */ @@ -1118,7 +1170,7 @@ xmlParseCharEncoding(const char* name) */ if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); - + if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); @@ -1131,7 +1183,7 @@ xmlParseCharEncoding(const char* name) if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); - + if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); @@ -1254,7 +1306,7 @@ static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). */ xmlCharEncodingHandlerPtr -xmlNewCharEncodingHandler(const char *name, +xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output) { xmlCharEncodingHandlerPtr handler; @@ -1299,6 +1351,7 @@ xmlNewCharEncodingHandler(const char *name, xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); return(NULL); } + memset(handler, 0, sizeof(xmlCharEncodingHandler)); handler->input = input; handler->output = output; handler->name = up; @@ -1306,7 +1359,11 @@ xmlNewCharEncodingHandler(const char *name, #ifdef LIBXML_ICONV_ENABLED handler->iconv_in = NULL; handler->iconv_out = NULL; -#endif /* LIBXML_ICONV_ENABLED */ +#endif +#ifdef LIBXML_ICU_ENABLED + handler->uconv_in = NULL; + handler->uconv_out = NULL; +#endif /* * registers and returns the handler. @@ -1330,7 +1387,7 @@ xmlNewCharEncodingHandler(const char *name, void xmlInitCharEncodingHandlers(void) { unsigned short int tst = 0x1234; - unsigned char *ptr = (unsigned char *) &tst; + unsigned char *ptr = (unsigned char *) &tst; if (handlers != NULL) return; @@ -1350,9 +1407,9 @@ xmlInitCharEncodingHandlers(void) { } xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); #ifdef LIBXML_OUTPUT_ENABLED - xmlUTF16LEHandler = + xmlUTF16LEHandler = xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); - xmlUTF16BEHandler = + xmlUTF16BEHandler = xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); @@ -1362,16 +1419,16 @@ xmlInitCharEncodingHandlers(void) { xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); #endif #else - xmlUTF16LEHandler = + xmlUTF16LEHandler = xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); - xmlUTF16BEHandler = + xmlUTF16BEHandler = xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); #endif /* LIBXML_OUTPUT_ENABLED */ -#ifndef LIBXML_ICONV_ENABLED +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) #ifdef LIBXML_ISO8859X_ENABLED xmlRegisterCharEncodingHandlersISO8859x (); #endif @@ -1414,7 +1471,7 @@ xmlCleanupCharEncodingHandlers(void) { void xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { if (handlers == NULL) xmlInitCharEncodingHandlers(); - if (handler == NULL) { + if ((handler == NULL) || (handlers == NULL)) { xmlEncodingErr(XML_I18N_NO_HANDLER, "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); return; @@ -1458,6 +1515,10 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) { if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("ebcdic"); if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("EBCDIC-US"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("IBM-037"); + if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_UCS4BE: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); @@ -1548,10 +1609,10 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) { handler = xmlFindCharEncodingHandler("EUC-JP"); if (handler != NULL) return(handler); break; - default: + default: break; } - + #ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "No handler found for encoding %d\n", enc); @@ -1576,6 +1637,10 @@ xmlFindCharEncodingHandler(const char *name) { xmlCharEncodingHandlerPtr enc; iconv_t icv_in, icv_out; #endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + xmlCharEncodingHandlerPtr encu; + uconv_t *ucv_in, *ucv_out; +#endif /* LIBXML_ICU_ENABLED */ char upper[100]; int i; @@ -1600,14 +1665,17 @@ xmlFindCharEncodingHandler(const char *name) { } upper[i] = 0; - for (i = 0;i < nbCharEncodingHandler; i++) - if (!strcmp(upper, handlers[i]->name)) { + if (handlers != NULL) { + for (i = 0;i < nbCharEncodingHandler; i++) { + if (!strcmp(upper, handlers[i]->name)) { #ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "Found registered handler for encoding %s\n", name); + xmlGenericError(xmlGenericErrorContext, + "Found registered handler for encoding %s\n", name); #endif - return(handlers[i]); - } + return(handlers[i]); + } + } + } #ifdef LIBXML_ICONV_ENABLED /* check whether iconv can handle this */ @@ -1627,6 +1695,7 @@ xmlFindCharEncodingHandler(const char *name) { iconv_close(icv_out); return(NULL); } + memset(enc, 0, sizeof(xmlCharEncodingHandler)); enc->name = xmlMemStrdup(name); enc->input = NULL; enc->output = NULL; @@ -1642,6 +1711,36 @@ xmlFindCharEncodingHandler(const char *name) { "iconv : problems with filters for '%s'\n", name); } #endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + /* check whether icu can handle this */ + ucv_in = openIcuConverter(name, 1); + ucv_out = openIcuConverter(name, 0); + if (ucv_in != NULL && ucv_out != NULL) { + encu = (xmlCharEncodingHandlerPtr) + xmlMalloc(sizeof(xmlCharEncodingHandler)); + if (encu == NULL) { + closeIcuConverter(ucv_in); + closeIcuConverter(ucv_out); + return(NULL); + } + memset(encu, 0, sizeof(xmlCharEncodingHandler)); + encu->name = xmlMemStrdup(name); + encu->input = NULL; + encu->output = NULL; + encu->uconv_in = ucv_in; + encu->uconv_out = ucv_out; +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Found ICU converter handler for encoding %s\n", name); +#endif + return encu; + } else if (ucv_in != NULL || ucv_out != NULL) { + closeIcuConverter(ucv_in); + closeIcuConverter(ucv_out); + xmlEncodingErr(XML_ERR_INTERNAL_ERROR, + "ICU converter : problems with filters for '%s'\n", name); + } +#endif /* LIBXML_ICU_ENABLED */ #ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, @@ -1679,12 +1778,12 @@ xmlFindCharEncodingHandler(const char *name) { * @in: a pointer to an array of ISO Latin 1 chars * @inlen: the length of @in * - * Returns 0 if success, or + * Returns 0 if success, or * -1 by lack of space, or * -2 if the transcoding fails (for *in is not valid utf8 string or * the result of transformation can't fit into the encoding we want), or * -3 if there the last byte can't form a single output char. - * + * * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. * The value of @outlen after return is the number of ocetes consumed. @@ -1703,7 +1802,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, } icv_inlen = *inlen; icv_outlen = *outlen; - ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); + ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); *inlen -= icv_inlen; *outlen -= icv_outlen; if ((icv_inlen != 0) || (ret == -1)) { @@ -1732,27 +1831,97 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, /************************************************************************ * * + * ICU based generic conversion functions * + * * + ************************************************************************/ + +#ifdef LIBXML_ICU_ENABLED +/** + * xmlUconvWrapper: + * @cd: ICU uconverter data structure + * @toUnicode : non-zero if toUnicode. 0 otherwise. + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of ISO Latin 1 chars + * @inlen: the length of @in + * + * Returns 0 if success, or + * -1 by lack of space, or + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + * -3 if there the last byte can't form a single output char. + * + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictable. + * The value of @outlen after return is the number of ocetes consumed. + */ +static int +xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, + const unsigned char *in, int *inlen) { + const char *ucv_in = (const char *) in; + char *ucv_out = (char *) out; + UErrorCode err = U_ZERO_ERROR; + + if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { + if (outlen != NULL) *outlen = 0; + return(-1); + } + + /* + * TODO(jungshik) + * 1. is ucnv_convert(To|From)Algorithmic better? + * 2. had we better use an explicit pivot buffer? + * 3. error returned comes from 'fromUnicode' only even + * when toUnicode is true ! + */ + if (toUnicode) { + /* encoding => UTF-16 => UTF-8 */ + ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, + 0, TRUE, &err); + } else { + /* UTF-8 => UTF-16 => encoding */ + ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, + 0, TRUE, &err); + } + *inlen = ucv_in - (const char*) in; + *outlen = ucv_out - (char *) out; + if (U_SUCCESS(err)) + return 0; + if (err == U_BUFFER_OVERFLOW_ERROR) + return -1; + if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) + return -2; + /* if (err == U_TRUNCATED_CHAR_FOUND) */ + return -3; +} +#endif /* LIBXML_ICU_ENABLED */ + +/************************************************************************ + * * * The real API used by libxml for on-the-fly conversion * * * ************************************************************************/ /** - * xmlCharEncFirstLine: + * xmlCharEncFirstLineInt: * @handler: char enconding transformation data structure * @out: an xmlBuffer for the output. * @in: an xmlBuffer for the input - * + * @len: number of bytes to convert for the first line, or -1 + * * Front-end for the encoding handler input function, but handle only * the very first line, i.e. limit itself to 45 chars. - * - * Returns the number of byte written if success, or + * + * Returns the number of byte written if success, or * -1 general error * -2 if the transcoding fails (for *in is not valid utf8 string or * the result of transformation can't fit into the encoding we want), or */ int -xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, - xmlBufferPtr in) { +xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, + xmlBufferPtr in, int len) { int ret = -2; int written; int toconv; @@ -1761,19 +1930,28 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, if (out == NULL) return(-1); if (in == NULL) return(-1); - written = out->size - out->use; + /* calculate space available */ + written = out->size - out->use - 1; /* count '\0' */ toconv = in->use; - if (toconv * 2 >= written) { - xmlBufferGrow(out, toconv); - written = out->size - out->use - 1; - } - /* * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 * 45 chars should be sufficient to reach the end of the encoding * declaration without going too far inside the document content. + * on UTF-16 this means 90bytes, on UCS4 this means 180 + * The actual value depending on guessed encoding is passed as @len + * if provided */ - written = 45; + if (len >= 0) { + if (toconv > len) + toconv = len; + } else { + if (toconv > 180) + toconv = 180; + } + if (toconv * 2 >= written) { + xmlBufferGrow(out, toconv * 2); + written = out->size - out->use - 1; + } if (handler->input != NULL) { ret = handler->input(&out->content[out->use], &written, @@ -1792,6 +1970,16 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, if (ret == -1) ret = -3; } #endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (handler->uconv_in != NULL) { + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + if (ret == -1) ret = -3; + } +#endif /* LIBXML_ICU_ENABLED */ #ifdef DEBUG_ENCODING switch (ret) { case 0: @@ -1824,14 +2012,280 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, } /** + * xmlCharEncFirstLine: + * @handler: char enconding transformation data structure + * @out: an xmlBuffer for the output. + * @in: an xmlBuffer for the input + * + * Front-end for the encoding handler input function, but handle only + * the very first line, i.e. limit itself to 45 chars. + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, + xmlBufferPtr in) { + return(xmlCharEncFirstLineInt(handler, out, in, -1)); +} + +/** + * xmlCharEncFirstLineInput: + * @input: a parser input buffer + * @len: number of bytes to convert for the first line, or -1 + * + * Front-end for the encoding handler input function, but handle only + * the very first line. Point is that this is based on autodetection + * of the encoding and once that first line is converted we may find + * out that a different decoder is needed to process the input. + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) +{ + int ret = -2; + size_t written; + size_t toconv; + int c_in; + int c_out; + xmlBufPtr in; + xmlBufPtr out; + + if ((input == NULL) || (input->encoder == NULL) || + (input->buffer == NULL) || (input->raw == NULL)) + return (-1); + out = input->buffer; + in = input->raw; + + toconv = xmlBufUse(in); + if (toconv == 0) + return (0); + written = xmlBufAvail(out) - 1; /* count '\0' */ + /* + * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 + * 45 chars should be sufficient to reach the end of the encoding + * declaration without going too far inside the document content. + * on UTF-16 this means 90bytes, on UCS4 this means 180 + * The actual value depending on guessed encoding is passed as @len + * if provided + */ + if (len >= 0) { + if (toconv > (unsigned int) len) + toconv = len; + } else { + if (toconv > 180) + toconv = 180; + } + if (toconv * 2 >= written) { + xmlBufGrow(out, toconv * 2); + written = xmlBufAvail(out) - 1; + } + if (written > 360) + written = 360; + + c_in = toconv; + c_out = written; + if (input->encoder->input != NULL) { + ret = input->encoder->input(xmlBufEnd(out), &c_out, + xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + } +#ifdef LIBXML_ICONV_ENABLED + else if (input->encoder->iconv_in != NULL) { + ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), + &c_out, xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + if (ret == -1) + ret = -3; + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (input->encoder->uconv_in != NULL) { + ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), + &c_out, xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + if (ret == -1) + ret = -3; + } +#endif /* LIBXML_ICU_ENABLED */ + switch (ret) { + case 0: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input\n", + c_in, c_out); +#endif + break; + case -1: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input, %d left\n", + c_in, c_out, (int)xmlBufUse(in)); +#endif + break; + case -3: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input, %d left\n", + c_in, c_out, (int)xmlBufUse(in)); +#endif + break; + case -2: { + char buf[50]; + const xmlChar *content = xmlBufContent(in); + + snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", + content[0], content[1], + content[2], content[3]); + buf[49] = 0; + xmlEncodingErr(XML_I18N_CONV_FAILED, + "input conversion failed due to input error, bytes %s\n", + buf); + } + } + /* + * Ignore when input buffer is not on a boundary + */ + if (ret == -3) ret = 0; + if (ret == -1) ret = 0; + return(ret); +} + +/** + * xmlCharEncInput: + * @input: a parser input buffer + * @flush: try to flush all the raw buffer + * + * Generic front-end for the encoding handler on parser input + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncInput(xmlParserInputBufferPtr input, int flush) +{ + int ret = -2; + size_t written; + size_t toconv; + int c_in; + int c_out; + xmlBufPtr in; + xmlBufPtr out; + + if ((input == NULL) || (input->encoder == NULL) || + (input->buffer == NULL) || (input->raw == NULL)) + return (-1); + out = input->buffer; + in = input->raw; + + toconv = xmlBufUse(in); + if (toconv == 0) + return (0); + if ((toconv > 64 * 1024) && (flush == 0)) + toconv = 64 * 1024; + written = xmlBufAvail(out); + if (written > 0) + written--; /* count '\0' */ + if (toconv * 2 >= written) { + xmlBufGrow(out, toconv * 2); + written = xmlBufAvail(out); + if (written > 0) + written--; /* count '\0' */ + } + if ((written > 128 * 1024) && (flush == 0)) + written = 128 * 1024; + + c_in = toconv; + c_out = written; + if (input->encoder->input != NULL) { + ret = input->encoder->input(xmlBufEnd(out), &c_out, + xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + } +#ifdef LIBXML_ICONV_ENABLED + else if (input->encoder->iconv_in != NULL) { + ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), + &c_out, xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + if (ret == -1) + ret = -3; + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (input->encoder->uconv_in != NULL) { + ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), + &c_out, xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + if (ret == -1) + ret = -3; + } +#endif /* LIBXML_ICU_ENABLED */ + switch (ret) { + case 0: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input\n", + c_in, c_out); +#endif + break; + case -1: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input, %d left\n", + c_in, c_out, (int)xmlBufUse(in)); +#endif + break; + case -3: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input, %d left\n", + c_in, c_out, (int)xmlBufUse(in)); +#endif + break; + case -2: { + char buf[50]; + const xmlChar *content = xmlBufContent(in); + + snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", + content[0], content[1], + content[2], content[3]); + buf[49] = 0; + xmlEncodingErr(XML_I18N_CONV_FAILED, + "input conversion failed due to input error, bytes %s\n", + buf); + } + } + /* + * Ignore when input buffer is not on a boundary + */ + if (ret == -3) + ret = 0; + return (c_out? c_out : ret); +} + +/** * xmlCharEncInFunc: * @handler: char encoding transformation data structure * @out: an xmlBuffer for the output. * @in: an xmlBuffer for the input - * + * * Generic front-end for the encoding handler input function - * - * Returns the number of byte written if success, or + * + * Returns the number of byte written if success, or * -1 general error * -2 if the transcoding fails (for *in is not valid utf8 string or * the result of transformation can't fit into the encoding we want), or @@ -1854,7 +2308,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, toconv = in->use; if (toconv == 0) return (0); - written = out->size - out->use; + written = out->size - out->use -1; /* count '\0' */ if (toconv * 2 >= written) { xmlBufferGrow(out, out->size + toconv * 2); written = out->size - out->use - 1; @@ -1877,6 +2331,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, ret = -3; } #endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (handler->uconv_in != NULL) { + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + if (ret == -1) + ret = -3; + } +#endif /* LIBXML_ICU_ENABLED */ switch (ret) { case 0: #ifdef DEBUG_ENCODING @@ -1902,7 +2367,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, case -2: { char buf[50]; - snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", + snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", in->content[0], in->content[1], in->content[2], in->content[3]); buf[49] = 0; @@ -1919,20 +2384,251 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, return (written? written : ret); } +#ifdef LIBXML_OUTPUT_ENABLED +/** + * xmlCharEncOutput: + * @output: a parser output buffer + * @init: is this an initialization call without data + * + * Generic front-end for the encoding handler on parser output + * a first call with @init == 1 has to be made first to initiate the + * output in case of non-stateless encoding needing to initiate their + * state or the output (like the BOM in UTF16). + * In case of UTF8 sequence conversion errors for the given encoder, + * the content will be automatically remapped to a CharRef sequence. + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncOutput(xmlOutputBufferPtr output, int init) +{ + int ret = -2; + size_t written; + size_t writtentot = 0; + size_t toconv; + int c_in; + int c_out; + xmlBufPtr in; + xmlBufPtr out; + int charref_len = 0; + + if ((output == NULL) || (output->encoder == NULL) || + (output->buffer == NULL) || (output->conv == NULL)) + return (-1); + out = output->conv; + in = output->buffer; + +retry: + + written = xmlBufAvail(out); + if (written > 0) + written--; /* count '\0' */ + + /* + * First specific handling of the initialization call + */ + if (init) { + c_in = 0; + c_out = written; + if (output->encoder->output != NULL) { + ret = output->encoder->output(xmlBufEnd(out), &c_out, + NULL, &c_in); + if (ret > 0) /* Gennady: check return value */ + xmlBufAddLen(out, c_out); + } +#ifdef LIBXML_ICONV_ENABLED + else if (output->encoder->iconv_out != NULL) { + ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), + &c_out, NULL, &c_in); + xmlBufAddLen(out, c_out); + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (output->encoder->uconv_out != NULL) { + ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), + &c_out, NULL, &c_in); + xmlBufAddLen(out, c_out); + } +#endif /* LIBXML_ICU_ENABLED */ +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "initialized encoder\n"); +#endif + return(0); + } + + /* + * Conversion itself. + */ + toconv = xmlBufUse(in); + if (toconv == 0) + return (0); + if (toconv > 64 * 1024) + toconv = 64 * 1024; + if (toconv * 4 >= written) { + xmlBufGrow(out, toconv * 4); + written = xmlBufAvail(out) - 1; + } + if (written > 256 * 1024) + written = 256 * 1024; + + c_in = toconv; + c_out = written; + if (output->encoder->output != NULL) { + ret = output->encoder->output(xmlBufEnd(out), &c_out, + xmlBufContent(in), &c_in); + if (c_out > 0) { + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + writtentot += c_out; + } + } +#ifdef LIBXML_ICONV_ENABLED + else if (output->encoder->iconv_out != NULL) { + ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), + &c_out, xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + writtentot += c_out; + if (ret == -1) { + if (c_out > 0) { + /* + * Can be a limitation of iconv + */ + charref_len = 0; + goto retry; + } + ret = -3; + } + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (output->encoder->uconv_out != NULL) { + ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), + &c_out, xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + writtentot += c_out; + if (ret == -1) { + if (c_out > 0) { + /* + * Can be a limitation of uconv + */ + charref_len = 0; + goto retry; + } + ret = -3; + } + } +#endif /* LIBXML_ICU_ENABLED */ + else { + xmlEncodingErr(XML_I18N_NO_OUTPUT, + "xmlCharEncOutFunc: no output function !\n", NULL); + return(-1); + } + + if (ret >= 0) output += ret; + + /* + * Attempt to handle error cases + */ + switch (ret) { + case 0: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of output\n", + c_in, c_out); +#endif + break; + case -1: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "output conversion failed by lack of space\n"); +#endif + break; + case -3: +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", + c_in, c_out, (int) xmlBufUse(in)); +#endif + break; + case -2: { + int len = (int) xmlBufUse(in); + xmlChar *content = xmlBufContent(in); + int cur; + + cur = xmlGetUTF8Char(content, &len); + if ((charref_len != 0) && (c_out < charref_len)) { + /* + * We attempted to insert a character reference and failed. + * Undo what was written and skip the remaining charref. + */ + xmlBufErase(out, c_out); + writtentot -= c_out; + xmlBufShrink(in, charref_len - c_out); + charref_len = 0; + + ret = -1; + break; + } else if (cur > 0) { + xmlChar charref[20]; + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "handling output conversion error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + content[0], content[1], + content[2], content[3]); +#endif + /* + * Removes the UTF8 sequence, and replace it by a charref + * and continue the transcoding phase, hoping the error + * did not mangle the encoder state. + */ + charref_len = snprintf((char *) &charref[0], sizeof(charref), + "&#%d;", cur); + xmlBufShrink(in, len); + xmlBufAddHead(in, charref, -1); + + goto retry; + } else { + char buf[50]; + + snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", + content[0], content[1], + content[2], content[3]); + buf[49] = 0; + xmlEncodingErr(XML_I18N_CONV_FAILED, + "output conversion failed due to conv error, bytes %s\n", + buf); + if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE) + content[0] = ' '; + } + break; + } + } + return(ret); +} +#endif + /** * xmlCharEncOutFunc: * @handler: char enconding transformation data structure * @out: an xmlBuffer for the output. * @in: an xmlBuffer for the input - * + * * Generic front-end for the encoding handler output function - * a first call with @in == NULL has to be made firs to initiate the + * a first call with @in == NULL has to be made firs to initiate the * output in case of non-stateless encoding needing to initiate their * state or the output (like the BOM in UTF16). * In case of UTF8 sequence conversion errors for the given encoder, * the content will be automatically remapped to a CharRef sequence. - * - * Returns the number of byte written if success, or + * + * Returns the number of byte written if success, or * -1 general error * -2 if the transcoding fails (for *in is not valid utf8 string or * the result of transformation can't fit into the encoding we want), or @@ -1945,12 +2641,13 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, int writtentot = 0; int toconv; int output = 0; + int charref_len = 0; if (handler == NULL) return(-1); if (out == NULL) return(-1); retry: - + written = out->size - out->use; if (written > 0) @@ -1977,6 +2674,15 @@ retry: out->content[out->use] = 0; } #endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (handler->uconv_out != NULL) { + ret = xmlUconvWrapper(handler->uconv_out, 0, + &out->content[out->use], + &written, NULL, &toconv); + out->use += written; + out->content[out->use] = 0; + } +#endif /* LIBXML_ICU_ENABLED */ #ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "initialized encoder\n"); @@ -1990,16 +2696,18 @@ retry: toconv = in->use; if (toconv == 0) return(0); - if (toconv * 2 >= written) { - xmlBufferGrow(out, toconv * 2); + if (toconv * 4 >= written) { + xmlBufferGrow(out, toconv * 4); written = out->size - out->use - 1; } if (handler->output != NULL) { ret = handler->output(&out->content[out->use], &written, in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - writtentot += written; + if (written > 0) { + xmlBufferShrink(in, toconv); + out->use += written; + writtentot += written; + } out->content[out->use] = 0; } #ifdef LIBXML_ICONV_ENABLED @@ -2015,12 +2723,34 @@ retry: /* * Can be a limitation of iconv */ + charref_len = 0; goto retry; } ret = -3; } } #endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (handler->uconv_out != NULL) { + ret = xmlUconvWrapper(handler->uconv_out, 0, + &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + writtentot += written; + out->content[out->use] = 0; + if (ret == -1) { + if (written > 0) { + /* + * Can be a limitation of iconv + */ + charref_len = 0; + goto retry; + } + ret = -3; + } + } +#endif /* LIBXML_ICU_ENABLED */ else { xmlEncodingErr(XML_I18N_NO_OUTPUT, "xmlCharEncOutFunc: no output function !\n", NULL); @@ -2058,7 +2788,19 @@ retry: int cur; cur = xmlGetUTF8Char(utf, &len); - if (cur > 0) { + if ((charref_len != 0) && (written < charref_len)) { + /* + * We attempted to insert a character reference and failed. + * Undo what was written and skip the remaining charref. + */ + out->use -= written; + writtentot -= written; + xmlBufferShrink(in, charref_len - written); + charref_len = 0; + + ret = -1; + break; + } else if (cur > 0) { xmlChar charref[20]; #ifdef DEBUG_ENCODING @@ -2074,7 +2816,8 @@ retry: * and continue the transcoding phase, hoping the error * did not mangle the encoder state. */ - snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur); + charref_len = snprintf((char *) &charref[0], sizeof(charref), + "&#%d;", cur); xmlBufferShrink(in, len); xmlBufferAddHead(in, charref, -1); @@ -2082,7 +2825,7 @@ retry: } else { char buf[50]; - snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", + snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", in->content[0], in->content[1], in->content[2], in->content[3]); buf[49] = 0; @@ -2101,7 +2844,7 @@ retry: /** * xmlCharEncCloseFunc: * @handler: char enconding transformation data structure - * + * * Generic front-end for encoding handler close function * * Returns 0 if success, or -1 in case of error @@ -2109,17 +2852,27 @@ retry: int xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { int ret = 0; + int tofree = 0; + int i, handler_in_list = 0; + if (handler == NULL) return(-1); if (handler->name == NULL) return(-1); + if (handlers != NULL) { + for (i = 0;i < nbCharEncodingHandler; i++) { + if (handler == handlers[i]) { + handler_in_list = 1; + break; + } + } + } #ifdef LIBXML_ICONV_ENABLED /* * Iconv handlers can be used only once, free the whole block. * and the associated icon resources. */ - if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { - if (handler->name != NULL) - xmlFree(handler->name); - handler->name = NULL; + if ((handler_in_list == 0) && + ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) { + tofree = 1; if (handler->iconv_out != NULL) { if (iconv_close(handler->iconv_out)) ret = -1; @@ -2130,9 +2883,29 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { ret = -1; handler->iconv_in = NULL; } - xmlFree(handler); } #endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + if ((handler_in_list == 0) && + ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) { + tofree = 1; + if (handler->uconv_out != NULL) { + closeIcuConverter(handler->uconv_out); + handler->uconv_out = NULL; + } + if (handler->uconv_in != NULL) { + closeIcuConverter(handler->uconv_in); + handler->uconv_in = NULL; + } + } +#endif + if (tofree) { + /* free up only dynamic handlers iconv/uconv */ + if (handler->name != NULL) + xmlFree(handler->name); + handler->name = NULL; + xmlFree(handler); + } #ifdef DEBUG_ENCODING if (ret) xmlGenericError(xmlGenericErrorContext, @@ -2162,7 +2935,7 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { long xmlByteConsumed(xmlParserCtxtPtr ctxt) { xmlParserInputPtr in; - + if (ctxt == NULL) return(-1); in = ctxt->input; if (in == NULL) return(-1); @@ -2208,6 +2981,23 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { cur += toconv; } while (ret == -2); #endif +#ifdef LIBXML_ICU_ENABLED + } else if (handler->uconv_out != NULL) { + do { + toconv = in->end - cur; + written = 32000; + ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], + &written, cur, &toconv); + if (ret < 0) { + if (written > 0) + ret = -2; + else + return(-1); + } + unused += written; + cur += toconv; + } while (ret == -2); +#endif } else { /* could not find a converter */ return(-1); @@ -2220,7 +3010,7 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { return(in->consumed + (in->cur - in->base)); } -#ifndef LIBXML_ICONV_ENABLED +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) #ifdef LIBXML_ISO8859X_ENABLED /** @@ -2246,6 +3036,7 @@ UTF8ToISO8859x(unsigned char* out, int *outlen, const unsigned char* outstart = out; const unsigned char* inend; const unsigned char* instart = in; + const unsigned char* processed = in; if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (xlattable == NULL)) @@ -2262,81 +3053,82 @@ UTF8ToISO8859x(unsigned char* out, int *outlen, while (in < inend) { unsigned char d = *in++; if (d < 0x80) { - *out++ = d; + *out++ = d; } else if (d < 0xC0) { /* trailing byte in leading position */ *outlen = out - outstart; - *inlen = in - instart - 1; + *inlen = processed - instart; return(-2); } else if (d < 0xE0) { unsigned char c; if (!(in < inend)) { /* trailing byte not in input buffer */ *outlen = out - outstart; - *inlen = in - instart - 1; - return(-2); + *inlen = processed - instart; + return(-3); } c = *in++; if ((c & 0xC0) != 0x80) { /* not a trailing byte */ *outlen = out - outstart; - *inlen = in - instart - 2; + *inlen = processed - instart; return(-2); } - c = c & 0x3F; + c = c & 0x3F; d = d & 0x1F; d = xlattable [48 + c + xlattable [d] * 64]; if (d == 0) { /* not in character set */ *outlen = out - outstart; - *inlen = in - instart - 2; + *inlen = processed - instart; return(-2); } - *out++ = d; + *out++ = d; } else if (d < 0xF0) { unsigned char c1; unsigned char c2; if (!(in < inend - 1)) { /* trailing bytes not in input buffer */ *outlen = out - outstart; - *inlen = in - instart - 1; - return(-2); + *inlen = processed - instart; + return(-3); } c1 = *in++; if ((c1 & 0xC0) != 0x80) { /* not a trailing byte (c1) */ *outlen = out - outstart; - *inlen = in - instart - 2; + *inlen = processed - instart; return(-2); } c2 = *in++; if ((c2 & 0xC0) != 0x80) { /* not a trailing byte (c2) */ *outlen = out - outstart; - *inlen = in - instart - 2; + *inlen = processed - instart; return(-2); } - c1 = c1 & 0x3F; - c2 = c2 & 0x3F; + c1 = c1 & 0x3F; + c2 = c2 & 0x3F; d = d & 0x0F; - d = xlattable [48 + c2 + xlattable [48 + c1 + - xlattable [32 + d] * 64] * 64]; + d = xlattable [48 + c2 + xlattable [48 + c1 + + xlattable [32 + d] * 64] * 64]; if (d == 0) { /* not in character set */ *outlen = out - outstart; - *inlen = in - instart - 3; + *inlen = processed - instart; return(-2); } - *out++ = d; + *out++ = d; } else { /* cannot transcode >= U+010000 */ *outlen = out - outstart; - *inlen = in - instart - 1; + *inlen = processed - instart; return(-2); } + processed = in; } *outlen = out - outstart; - *inlen = in - instart; + *inlen = processed - instart; return(*outlen); } @@ -2370,16 +3162,16 @@ ISO8859xToUTF8(unsigned char* out, int *outlen, outend = out + *outlen; inend = in + *inlen; instop = inend; - c = *in; - while (in < inend && out < outend - 1) { - if (c >= 0x80) { - c = unicodetable [c - 0x80]; + + while ((in < inend) && (out < outend - 2)) { + if (*in >= 0x80) { + c = unicodetable [*in - 0x80]; if (c == 0) { /* undefined code point */ *outlen = out - outstart; *inlen = in - instart; return (-1); - } + } if (c < 0x800) { *out++ = ((c >> 6) & 0x1F) | 0xC0; *out++ = (c & 0x3F) | 0x80; @@ -2387,48 +3179,47 @@ ISO8859xToUTF8(unsigned char* out, int *outlen, *out++ = ((c >> 12) & 0x0F) | 0xE0; *out++ = ((c >> 6) & 0x3F) | 0x80; *out++ = (c & 0x3F) | 0x80; - } + } ++in; - c = *in; } - if (instop - in > outend - out) instop = in + (outend - out); - while (c < 0x80 && in < instop) { - *out++ = c; - ++in; - c = *in; + if (instop - in > outend - out) instop = in + (outend - out); + while ((*in < 0x80) && (in < instop)) { + *out++ = *in++; } - } - if (in < inend && out < outend && c < 0x80) { - *out++ = c; - ++in; + } + if ((in < inend) && (out < outend) && (*in < 0x80)) { + *out++ = *in++; + } + if ((in < inend) && (out < outend) && (*in < 0x80)) { + *out++ = *in++; } *outlen = out - outstart; *inlen = in - instart; return (*outlen); } - + /************************************************************************ * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * ************************************************************************/ static unsigned short const xmlunicodetable_ISO8859_2 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, - 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, - 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, - 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, - 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, - 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, - 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, - 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, - 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, - 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, - 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, - 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, + 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, + 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, + 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, + 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, + 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, + 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, + 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, + 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, + 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, + 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, + 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, }; static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { @@ -2462,22 +3253,22 @@ static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_3 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, - 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, - 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, - 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, - 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, - 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, - 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, - 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, - 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, + 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, + 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, + 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, + 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, + 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, + 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, }; static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { @@ -2515,22 +3306,22 @@ static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_4 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, - 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, - 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, - 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, - 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, - 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, - 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, - 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, - 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, - 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, - 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, - 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, + 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, + 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, + 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, + 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, + 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, + 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, + 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, + 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, + 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, + 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, }; static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { @@ -2564,22 +3355,22 @@ static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_5 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, - 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, - 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, - 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, - 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, - 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, - 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, - 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, - 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, - 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, - 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, - 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, + 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, + 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, }; static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { @@ -2613,22 +3404,22 @@ static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_6 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, - 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, - 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, - 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, - 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, - 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, - 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, + 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, + 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, + 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, + 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, + 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }; static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { @@ -2658,22 +3449,22 @@ static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_7 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, - 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, - 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, - 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, - 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, - 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, - 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, - 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, - 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, - 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, - 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, + 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, + 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, + 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, + 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, + 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, + 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, }; static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { @@ -2711,22 +3502,22 @@ static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_8 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, - 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, - 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, - 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, - 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, - 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, - 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, + 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, + 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, + 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, + 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, }; static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { @@ -2764,22 +3555,22 @@ static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_9 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, - 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, - 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, - 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, - 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, - 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, - 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, - 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, }; static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { @@ -2809,22 +3600,22 @@ static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_10 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, - 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, - 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, - 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, - 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, - 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, - 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, - 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, - 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, - 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, - 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, + 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, + 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, + 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, + 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, + 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, + 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, + 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, + 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, + 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, + 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, }; static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { @@ -2862,22 +3653,22 @@ static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_11 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, - 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, - 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, - 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, - 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, - 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, - 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, - 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, - 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, - 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, - 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, - 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, + 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, + 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, + 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, + 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, + 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, + 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, + 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, + 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, + 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, + 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, + 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, }; static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { @@ -2911,22 +3702,22 @@ static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_13 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, - 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, - 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, - 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, - 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, - 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, - 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, - 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, - 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, - 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, - 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, + 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, + 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, + 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, + 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, + 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, + 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, + 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, + 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, + 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, + 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, }; static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { @@ -2964,22 +3755,22 @@ static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_14 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, - 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, - 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, - 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, - 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, - 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, - 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, - 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, - 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, + 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, + 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, + 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, }; static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { @@ -3029,22 +3820,22 @@ static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_15 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, - 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, - 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, - 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, - 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, - 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, - 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, - 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, + 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, + 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, }; static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { @@ -3078,22 +3869,22 @@ static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { }; static unsigned short const xmlunicodetable_ISO8859_16 [128] = { - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, - 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, - 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, - 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, - 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, - 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, - 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, - 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, - 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, + 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, + 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, + 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, + 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, + 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, + 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, }; static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { @@ -3292,4 +4083,3 @@ xmlRegisterCharEncodingHandlersISO8859x (void) { #define bottom_encoding #include "elfgcchack.h" - |