diff options
author | William M. Brack <wbrack@src.gnome.org> | 2003-11-28 09:39:10 +0000 |
---|---|---|
committer | William M. Brack <wbrack@src.gnome.org> | 2003-11-28 09:39:10 +0000 |
commit | f9415e4989914e24188e4644f39bdc41bf25f7f3 (patch) | |
tree | 891bfbd76964862953636b22e29fc12080ad14f1 /include | |
parent | ae8c9642d80649d82045702855a3a4b9f6540e23 (diff) | |
download | libxml2-f9415e4989914e24188e4644f39bdc41bf25f7f3.tar.gz |
Enhanced the handling of UTF-16, UTF-16LE and UTF-16BE encodings. Now
* encoding.c, include/libxml/encoding.h: Enhanced the handling of UTF-16,
UTF-16LE and UTF-16BE encodings. Now UTF-16 output is handled internally
by default, with proper BOM and UTF-16LE encoding. Native UTF-16LE and
UTF-16BE encoding will not generate a BOM on output, and will be
automatically recognized on input.
* test/utf16lebom.xml, test/utf16bebom.xml, result/utf16?ebom*: added
regression tests for above.
Diffstat (limited to 'include')
-rw-r--r-- | include/libxml/encoding.h | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h index 74b4c4ab..b49f2741 100644 --- a/include/libxml/encoding.h +++ b/include/libxml/encoding.h @@ -35,19 +35,22 @@ extern "C" { * xmlCharEncoding: * * Predefined values for some standard encodings. - * Libxml don't do beforehand translation on UTF8, ISOLatinX. - * It also support UTF16 (LE and BE) by default. + * Libxml does not do beforehand translation on UTF8 and ISOLatinX. + * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default. * * Anything else would have to be translated to UTF8 before being * given to the parser itself. The BOM for UTF16 and the encoding * declaration are looked at and a converter is looked for at that - * point. If not found the parser stops here as asked by the XML REC - * Converter can be registered by the user using xmlRegisterCharEncodingHandler + * point. If not found the parser stops here as asked by the XML REC. A + * converter can be registered by the user using xmlRegisterCharEncodingHandler * but the current form doesn't allow stateful transcoding (a serious * problem agreed !). If iconv has been found it will be used * automatically and allow stateful transcoding, the simplest is then - * to be sure to enable icon and to provide iconv libs for the encoding + * to be sure to enable iconv and to provide iconv libs for the encoding * support needed. + * + * Note that the generic "UTF-16" is not a predefined value. Instead, only + * the specific UTF-16LE and UTF-16BE are present. */ typedef enum { XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ @@ -86,10 +89,10 @@ typedef enum { * Take a block of chars in the original encoding and try to convert * it to an UTF-8 block of chars out. * - * Returns the number of byte written, or -1 by lack of space, or -2 + * Returns the number of bytes written, -1 if lack of space, or -2 * if the transcoding failed. * The value of @inlen after return is the number of octets consumed - * as the return value is positive, else unpredictiable. + * if the return value is positive, else unpredictiable. * The value of @outlen after return is the number of octets consumed. */ typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen, @@ -103,15 +106,15 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen, * @in: a pointer to an array of UTF-8 chars * @inlen: the length of @in * - * Take a block of UTF-8 chars in and try to convert it to an other + * Take a block of UTF-8 chars in and try to convert it to another * encoding. * Note: a first call designed to produce heading info is called with * in = NULL. If stateful this should also initialize the encoder state. * - * Returns the number of byte written, or -1 by lack of space, or -2 + * Returns the number of bytes written, -1 if lack of space, or -2 * if the transcoding failed. * The value of @inlen after return is the number of octets consumed - * as the return value is positive, else unpredictiable. + * if the return value is positive, else unpredictiable. * The value of @outlen after return is the number of ocetes consumed. */ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, @@ -120,7 +123,7 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, /* * Block defining the handlers for non UTF-8 encodings. - * If iconv is supported, there is two extra fields. + * If iconv is supported, there are two extra fields. */ typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; @@ -218,7 +221,7 @@ XMLPUBFUN int XMLCALL xmlGetUTF8Char (const unsigned char *utf, int *len); /* - * exports additional "UTF-8 aware" string routines which are. + * Export additional string routines which are "UTF-8 aware". */ XMLPUBFUN int XMLCALL |