summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorWilliam M. Brack <wbrack@src.gnome.org>2003-11-28 09:39:10 +0000
committerWilliam M. Brack <wbrack@src.gnome.org>2003-11-28 09:39:10 +0000
commitf9415e4989914e24188e4644f39bdc41bf25f7f3 (patch)
tree891bfbd76964862953636b22e29fc12080ad14f1 /include
parentae8c9642d80649d82045702855a3a4b9f6540e23 (diff)
downloadlibxml2-f9415e4989914e24188e4644f39bdc41bf25f7f3.tar.gz
Enhanced the handling of UTF-16, UTF-16LE and UTF-16BE encodings. Now
* encoding.c, include/libxml/encoding.h: Enhanced the handling of UTF-16, UTF-16LE and UTF-16BE encodings. Now UTF-16 output is handled internally by default, with proper BOM and UTF-16LE encoding. Native UTF-16LE and UTF-16BE encoding will not generate a BOM on output, and will be automatically recognized on input. * test/utf16lebom.xml, test/utf16bebom.xml, result/utf16?ebom*: added regression tests for above.
Diffstat (limited to 'include')
-rw-r--r--include/libxml/encoding.h27
1 files changed, 15 insertions, 12 deletions
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h
index 74b4c4ab..b49f2741 100644
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -35,19 +35,22 @@ extern "C" {
* xmlCharEncoding:
*
* Predefined values for some standard encodings.
- * Libxml don't do beforehand translation on UTF8, ISOLatinX.
- * It also support UTF16 (LE and BE) by default.
+ * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
+ * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
*
* Anything else would have to be translated to UTF8 before being
* given to the parser itself. The BOM for UTF16 and the encoding
* declaration are looked at and a converter is looked for at that
- * point. If not found the parser stops here as asked by the XML REC
- * Converter can be registered by the user using xmlRegisterCharEncodingHandler
+ * point. If not found the parser stops here as asked by the XML REC. A
+ * converter can be registered by the user using xmlRegisterCharEncodingHandler
* but the current form doesn't allow stateful transcoding (a serious
* problem agreed !). If iconv has been found it will be used
* automatically and allow stateful transcoding, the simplest is then
- * to be sure to enable icon and to provide iconv libs for the encoding
+ * to be sure to enable iconv and to provide iconv libs for the encoding
* support needed.
+ *
+ * Note that the generic "UTF-16" is not a predefined value. Instead, only
+ * the specific UTF-16LE and UTF-16BE are present.
*/
typedef enum {
XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
@@ -86,10 +89,10 @@ typedef enum {
* Take a block of chars in the original encoding and try to convert
* it to an UTF-8 block of chars out.
*
- * Returns the number of byte written, or -1 by lack of space, or -2
+ * Returns the number of bytes written, -1 if lack of space, or -2
* if the transcoding failed.
* The value of @inlen after return is the number of octets consumed
- * as the return value is positive, else unpredictiable.
+ * if the return value is positive, else unpredictiable.
* The value of @outlen after return is the number of octets consumed.
*/
typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
@@ -103,15 +106,15 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
* @in: a pointer to an array of UTF-8 chars
* @inlen: the length of @in
*
- * Take a block of UTF-8 chars in and try to convert it to an other
+ * Take a block of UTF-8 chars in and try to convert it to another
* encoding.
* Note: a first call designed to produce heading info is called with
* in = NULL. If stateful this should also initialize the encoder state.
*
- * Returns the number of byte written, or -1 by lack of space, or -2
+ * Returns the number of bytes written, -1 if lack of space, or -2
* if the transcoding failed.
* The value of @inlen after return is the number of octets consumed
- * as the return value is positive, else unpredictiable.
+ * if the return value is positive, else unpredictiable.
* The value of @outlen after return is the number of ocetes consumed.
*/
typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
@@ -120,7 +123,7 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
/*
* Block defining the handlers for non UTF-8 encodings.
- * If iconv is supported, there is two extra fields.
+ * If iconv is supported, there are two extra fields.
*/
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
@@ -218,7 +221,7 @@ XMLPUBFUN int XMLCALL
xmlGetUTF8Char (const unsigned char *utf,
int *len);
/*
- * exports additional "UTF-8 aware" string routines which are.
+ * Export additional string routines which are "UTF-8 aware".
*/
XMLPUBFUN int XMLCALL