1 files changed, 983 insertions, 598 deletions
diff --git a/src/xlibi18n/lcCT.c b/src/xlibi18n/lcCT.c
index 5dd2ddcc..fcaca229 100644
--- a/src/xlibi18n/lcCT.c
+++ b/src/xlibi18n/lcCT.c
@@ -31,70 +31,107 @@
  * Modifier: Takanori Tateno   FUJITSU LIMITED
  *
  */
+/*
+ *  2000
+ *  Modifier: Ivan Pascal     The XFree86 Project
+ *  Modifier: Bruno Haible    The XFree86 Project
+ */
+/* $XFree86: xc/lib/X11/lcCT.c,v 3.27 2002/10/08 23:31:35 dawes Exp $ */
 
 #include "Xlibint.h"
 #include "XlcPubI.h"
 #include <X11/Xos.h>
 #include <stdio.h>
 
-typedef struct _StateRec {
-    XlcCharSet charset;
-    XlcCharSet GL_charset;
-    XlcCharSet GR_charset;
-    XlcCharSet ext_seg_charset;
-    int ext_seg_left;
-} StateRec, *State;
 
+/* ====================== Built-in Character Sets ====================== */
+
+/*
+ * Static representation of a character set that can be used in Compound Text.
+ */
 typedef struct _CTDataRec {
-    char *name;
-    char *encoding;		/* Compound Text encoding */
+    const char *name;
+    const char *ct_sequence;	/* Compound Text encoding, ESC sequence */
 } CTDataRec, *CTData;
 
-typedef struct _CTInfoRec {
-    XlcCharSet charset;
-    int encoding_len;
-    char *encoding;		/* Compound Text encoding */
-    int ext_segment_len;
-    char *ext_segment;		/* extended segment */
-    struct _CTInfoRec *next;
-} CTInfoRec, *CTInfo;
-
 static CTDataRec default_ct_data[] =
 {
-    { "ISO8859-1:GL", "\033(B" },
-    { "ISO8859-1:GR", "\033-A" },
-    { "ISO8859-2:GR", "\033-B" },
-    { "ISO8859-3:GR", "\033-C" },
-    { "ISO8859-4:GR", "\033-D" },
-    { "ISO8859-7:GR", "\033-F" },
-    { "ISO8859-6:GR", "\033-G" },
-    { "ISO8859-8:GR", "\033-H" },
-    { "ISO8859-5:GR", "\033-L" },
-    { "ISO8859-9:GR", "\033-M" },
-    { "ISO8859-10:GR", "\033-V" },
-    { "JISX0201.1976-0:GL", "\033(J" },
-    { "JISX0201.1976-0:GR", "\033)I" },
-
-    { "GB2312.1980-0:GL", "\033$(A" },
-    { "GB2312.1980-0:GR", "\033$)A" },
-    { "JISX0208.1983-0:GL", "\033$(B" },
-    { "JISX0208.1983-0:GR", "\033$)B" },
-    { "KSC5601.1987-0:GL", "\033$(C" },
-    { "KSC5601.1987-0:GR", "\033$)C" },
-#ifdef notdef
-    { "JISX0212.1990-0:GL", "\033$(D" },
-    { "JISX0212.1990-0:GR", "\033$)D" },
-    { "CNS11643.1986-1:GL", "\033$(G" },
-    { "CNS11643.1986-1:GR", "\033$)G" },
-    { "CNS11643.1986-2:GL", "\033$(H" },
-    { "CNS11643.1986-2:GR", "\033$)H" },
+    /*                                                                    */
+    /* X11 registry name       MIME name         ISO-IR      ESC sequence */
+    /*                                                                    */
+
+    /* Registered character sets with one byte per character */
+    { "ISO8859-1:GL",       /* US-ASCII              6   */  "\033(B" },
+    { "ISO8859-1:GR",       /* ISO-8859-1          100   */  "\033-A" },
+    { "ISO8859-2:GR",       /* ISO-8859-2          101   */  "\033-B" },
+    { "ISO8859-3:GR",       /* ISO-8859-3          109   */  "\033-C" },
+    { "ISO8859-4:GR",       /* ISO-8859-4          110   */  "\033-D" },
+    { "ISO8859-5:GR",       /* ISO-8859-5          144   */  "\033-L" },
+    { "ISO8859-6:GR",       /* ISO-8859-6          127   */  "\033-G" },
+    { "ISO8859-7:GR",       /* ISO-8859-7          126   */  "\033-F" },
+    { "ISO8859-8:GR",       /* ISO-8859-8          138   */  "\033-H" },
+    { "ISO8859-9:GR",       /* ISO-8859-9          148   */  "\033-M" },
+    { "ISO8859-10:GR",      /* ISO-8859-10         157   */  "\033-V" },
+    { "ISO8859-11:GR",      /* ISO-8859-11         166   */  "\033-T" },
+    { "ISO8859-13:GR",      /* ISO-8859-13         179   */  "\033-Y" },
+    { "ISO8859-14:GR",      /* ISO-8859-14         199   */  "\033-_" },
+    { "ISO8859-15:GR",      /* ISO-8859-15         203   */  "\033-b" },
+    { "ISO8859-16:GR",      /* ISO-8859-16         226   */  "\033-f" },
+    { "JISX0201.1976-0:GL", /* ISO-646-JP           14   */  "\033(J" },
+    { "JISX0201.1976-0:GR",                                  "\033)I" },
+#if 0
+    { "TIS620-0:GR",        /* TIS-620             166   */  "\033-T" },
 #endif
-    { "TIS620.2533-1:GR", "\033-T"},
-    { "ISO10646-1", "\033%B"},
-    /* Non-Standard Character Set Encodings */
-    { "KOI8-R:GR", "\033%/1\200\210koi8-r\002"},
-    { "FCD8859-15:GR", "\033%/1\200\213fcd8859-15\002"},
-} ; 
+
+    /* Registered character sets with two byte per character */
+    { "GB2312.1980-0:GL",   /* GB_2312-80           58   */ "\033$(A" },
+    { "GB2312.1980-0:GR",   /* GB_2312-80           58   */ "\033$)A" },
+    { "JISX0208.1983-0:GL", /* JIS_X0208-1983       87   */ "\033$(B" },
+    { "JISX0208.1983-0:GR", /* JIS_X0208-1983       87   */ "\033$)B" },
+    { "JISX0208.1990-0:GL", /* JIS_X0208-1990      168   */ "\033$(B" },
+    { "JISX0208.1990-0:GR", /* JIS_X0208-1990      168   */ "\033$)B" },
+    { "JISX0212.1990-0:GL", /* JIS_X0212-1990      159   */ "\033$(D" },
+    { "JISX0212.1990-0:GR", /* JIS_X0212-1990      159   */ "\033$)D" },
+    { "KSC5601.1987-0:GL",  /* KS_C_5601-1987      149   */ "\033$(C" },
+    { "KSC5601.1987-0:GR",  /* KS_C_5601-1987      149   */ "\033$)C" },
+    { "CNS11643.1986-1:GL", /* CNS 11643-1992 pl.1 171   */ "\033$(G" },
+    { "CNS11643.1986-1:GR", /* CNS 11643-1992 pl.1 171   */ "\033$)G" },
+    { "CNS11643.1986-2:GL", /* CNS 11643-1992 pl.2 172   */ "\033$(H" },
+    { "CNS11643.1986-2:GR", /* CNS 11643-1992 pl.2 172   */ "\033$)H" },
+    { "CNS11643.1992-3:GL", /* CNS 11643-1992 pl.3 183   */ "\033$(I" },
+    { "CNS11643.1992-3:GR", /* CNS 11643-1992 pl.3 183   */ "\033$)I" },
+    { "CNS11643.1992-4:GL", /* CNS 11643-1992 pl.4 184   */ "\033$(J" },
+    { "CNS11643.1992-4:GR", /* CNS 11643-1992 pl.4 184   */ "\033$)J" },
+    { "CNS11643.1992-5:GL", /* CNS 11643-1992 pl.5 185   */ "\033$(K" },
+    { "CNS11643.1992-5:GR", /* CNS 11643-1992 pl.5 185   */ "\033$)K" },
+    { "CNS11643.1992-6:GL", /* CNS 11643-1992 pl.6 186   */ "\033$(L" },
+    { "CNS11643.1992-6:GR", /* CNS 11643-1992 pl.6 186   */ "\033$)L" },
+    { "CNS11643.1992-7:GL", /* CNS 11643-1992 pl.7 187   */ "\033$(M" },
+    { "CNS11643.1992-7:GR", /* CNS 11643-1992 pl.7 187   */ "\033$)M" },
+
+    /* Registered encodings with a varying number of bytes per character */
+    { "ISO10646-1",         /* UTF-8               196   */ "\033%G"  },
+
+    /* Encodings without ISO-IR assigned escape sequence must be
+       defined in XLC_LOCALE files, using "\033%/1" or "\033%/2". */
+
+    /* Backward compatibility with XFree86 3.x */
+    { "ISO8859-14:GR",                                      "\033%/1" },
+    { "ISO8859-15:GR",                                      "\033%/1" },
+    /* For use by utf8 -> ctext */
+    { "BIG5-0:GLGR", "\033%/2"},
+    /* used by Emacs, but not backed by ISO-IR */
+    { "BIG5-E0:GL", "\033$(0" },
+    { "BIG5-E0:GR", "\033$)0" },
+    { "BIG5-E1:GL", "\033$(1" },
+    { "BIG5-E1:GR", "\033$)1" },
+
+};
+
+/* We represent UTF-8 as an XlcGLGR charset, not in extended segments. */
+#define UTF8_IN_EXTSEQ 0
+
+/* ======================= Parsing ESC Sequences ======================= */
 
 #define XctC0		0x0000
 #define XctHT		0x0009
@@ -104,6 +141,7 @@ static CTDataRec default_ct_data[] =
 #define XctC1		0x0080
 #define XctCSI		0x009b
 #define XctGR		0x00a0
+#define XctSTX		0x0002
 
 #define XctCntrlFunc	0x0023
 #define XctMB		0x0024
@@ -122,268 +160,282 @@ static CTDataRec default_ct_data[] =
 #define XctGL94MB	0x2428
 #define XctGR94MB	0x2429
 #define XctExtSeg	0x252f
-#define XctOtherSeg	0x2f00
-
-#define XctESCSeq	0x1b00
-#define XctCSISeq	0x9b00
-
-#define SKIP_I(str)	while (*(str) >= 0x20 && *(str) <=  0x2f) (str)++;
-#define SKIP_P(str)	while (*(str) >= 0x30 && *(str) <=  0x3f) (str)++;
-
-typedef struct {
-    XlcSide side;
-    int char_size;
-    int set_size;
-    int ext_seg_length;
-    int version;
-    CTInfo ct_info;
-} CTParseRec, *CTParse;
-
-CTDataRec *default_ct_data_list()
-{
-	return(default_ct_data);
-}
-
-size_t default_ct_data_list_num()
-{
-	size_t num = sizeof(default_ct_data) / sizeof(CTDataRec);
-	return(num);
-}
-
-static CTInfo ct_list = NULL;
-
-static CTInfo
-_XlcGetCTInfoFromEncoding(encoding, length)
-    register char *encoding;
-    register int length;
-{
-    register CTInfo ct_info;
-
-    for (ct_info = ct_list; ct_info; ct_info = ct_info->next) {
-	if (length >= ct_info->encoding_len) {
-	    if (ct_info->ext_segment) {
-		if (!strncmp(ct_info->encoding, encoding, 4) &&
-		    !strncmp(ct_info->ext_segment, encoding + 6,
-			     ct_info->ext_segment_len))
-		    return ct_info;
-	    } else if (!strncmp(ct_info->encoding, encoding,
-				ct_info->encoding_len)) {
-		return ct_info;
-	    }
-	}
-    }
-
-    return (CTInfo) NULL;
-}
+#define XctReturn	0x2540
 
+/*
+ * Parses the header of a Compound Text segment, i.e. the charset designator.
+ * The string starts at *text and has *length bytes.
+ * Return value is one of:
+ *   0 (no valid charset designator),
+ *   XctGL94, XctGR94, XctGR96, XctGL94MB, XctGR94MB,
+ *   XctLeftToRight, XctRightToLeft, XctDirectionEnd,
+ *   XctExtSeg, XctOtherCoding, XctReturn, XctIgnoreExt, XctNotIgnoreExt.
+ * If the return value is not 0, *text is incremented and *length decremented,
+ * to point past the charset designator. If the return value is one of
+ *   XctGL94, XctGR94, XctGR96, XctGL94MB, XctGR94MB,
+ *   XctExtSeg, XctOtherCoding, XctIgnoreExt, XctNotIgnoreExt,
+ * *final_byte is set to the "final byte" of the charset designator.
+ */
 static unsigned int
-_XlcParseCT(parse, text, length)
-    register CTParse parse;
-    char **text;
-    int *length;
+_XlcParseCT(
+    const char **text,
+    int *length,
+    unsigned char *final_byte)
 {
     unsigned int ret = 0;
     unsigned char ch;
-    register unsigned char *str = (unsigned char *) *text;
+    const unsigned char *str = (const unsigned char *) *text;
 
-    bzero((char *) parse, sizeof(CTParseRec));
+    *final_byte = 0;
 
+    if (*length < 1)
+        return 0;
     switch (ch = *str++) {
-	case XctESC:
-	    if (*str == XctOtherCoding && *(str + 1) == XctNonStandard
-		&& *(str + 2) >= 0x30 && *(str + 2) <= 0x3f && *length >= 6) {
-
-		/* non-standard encodings */
-		parse->side = XlcGLGR;
-		parse->set_size = 0;
-		str += 2;
-		if (*str <= 0x34) {
-		    parse->char_size = *str - 0x30;
-		    if (parse->char_size == 0) parse->char_size = 1;
-		    ret = XctExtSeg;
-		    parse->ct_info = _XlcGetCTInfoFromEncoding(*text, *length);
-		} else
-		    ret = XctOtherSeg;
-		str++;
-		parse->ext_seg_length = (*str - 128) * 128 + *(str + 1) - 128;
-		str += 2;
-
-		goto done;
-	    } else if (*str == XctCntrlFunc && *length >= 4 &&
-		       *(str + 1) >= 0x20 && *(str + 1) <= 0x2f &&
-		       (*(str + 2) == XctIgnoreExt ||
-			*(str + 2) == XctNotIgnoreExt)) {
-		
-		/* ignore extension or not */
-		str++;
-		parse->version = *str++ - 0x20;
-		ret = *str++;
-
-		goto done;
-	    }
-	    
-	    if (*str == XctMB) {	/* multiple-byte sets */
-		parse->char_size = 2;
-		str++;
-	    } else
-		parse->char_size = 1;
-	
-	    switch (*str) {
-		case XctGL94:
-		    parse->side = XlcGL;
-		    parse->set_size = 94;
-		    ret = (parse->char_size == 1) ? XctGL94 : XctGL94MB;
-		    break;
-		case XctGR94:
-		    parse->side = XlcGR;
-		    parse->set_size = 94;
-		    ret = (parse->char_size == 1) ? XctGR94 : XctGR94MB;
-		    break;
-		case XctGR96:
-		    if (parse->char_size == 1) {
-			parse->side = XlcGR;
-			parse->set_size = 96;
-			ret = XctGR96;
-		    }
-		    break;
-	    }
-	    if (ret) {
-		str++;
-		if (*str >= 0x24 && *str <= 0x2f) {	/* non-standard */
-		    ret = 0;
-		    str++;
-		}
-	    }
-
-	    SKIP_I(str)
-
-	    if (ret && *str < 0x40)			/* non-standard */
-		ret = 0;
-
-	    if (*str < 0x30 || *str > 0x7e || (char *) str - *text >= *length)
-		break;
-	    
-	    if (ret == 0)
-		ret = XctESCSeq;
-	    else {
-		if (parse->char_size == 2) {
-		    if (*str >= 0x70)
-			parse->char_size = 4;
-		    else if (*str >= 0x60)
-			parse->char_size = 3;
-		}
-		parse->ct_info = _XlcGetCTInfoFromEncoding(*text, *length);
-	    }
-	    str++;
-	    goto done;
-	case XctCSI:
+        case XctESC:
+            if (*length < 2)
+                return 0;
+            switch (ch = *str++) {
+                case XctOtherCoding:             /* % */
+                    if (*length < 3)
+                        return 0;
+                    ch = *str++;
+                    if (ch == XctNonStandard) {  /* / */
+                        if (*length < 4)
+                            return 0;
+                        ret = XctExtSeg;
+                        ch = *str++;
+                    } else if (ch == '@') {
+                        ret = XctReturn;
+                    } else {
+                        ret = XctOtherCoding;
+                    }
+                    *final_byte = ch;
+                    break;
+
+                case XctCntrlFunc:               /* # */
+                    if (*length < 4)
+                        return 0;
+                    *final_byte = *str++;
+                    switch (*str++) {
+                        case XctIgnoreExt:       /* 0 */
+                            ret = XctIgnoreExt;
+                            break;
+                        case XctNotIgnoreExt:    /* 1 */
+                            ret = XctNotIgnoreExt;
+                            break;
+                        default:
+                            ret = 0;
+                            break;
+                    }
+                    break;
+
+                case XctMB:                      /* $ */
+                    if (*length < 4)
+                        return 0;
+                    ch = *str++;
+                    switch (ch) {
+                        case XctGL94:            /* ( */
+                            ret = XctGL94MB;
+                            break;
+                        case XctGR94:            /* ) */
+                            ret = XctGR94MB;
+                            break;
+                        default:
+                            ret = 0;
+                            break;
+                    }
+                    *final_byte = *str++;
+                    break;
+
+                case XctGL94:                    /* ( */
+                    if (*length < 3)
+                        return 0;
+                    ret = XctGL94;
+                    *final_byte = *str++;
+                    break;
+                case XctGR94:                    /* ) */
+                    if (*length < 3)
+                        return 0;
+                    ret = XctGR94;
+                    *final_byte = *str++;
+                    break;
+                case XctGR96:                    /* - */
+                    if (*length < 3)
+                        return 0;
+                    ret = XctGR96;
+                    *final_byte = *str++;
+                    break;
+            }
+            break;
+        case XctCSI:
 	    /* direction */
-	    if (*str == XctLeftToRight && *(str + 1) == XctDirection) {
-		ret = XctLeftToRight;
-		str += 2;
-		goto done;
-	    } else if (*str == XctRightToLeft && *(str + 1) == XctDirection) {
-		ret = XctRightToLeft;
-		str += 2;
-		goto done;
-	    } else if (*str == XctDirectionEnd) {
-		ret = XctDirectionEnd;
-		str++;
-		goto done;
-	    }
+            if (*length < 2)
+                return 0;
+            switch (*str++) {
+                case XctLeftToRight:
+                    if (*length < 3)
+                        return 0;
+                    if (*str++ == XctDirection)
+                        ret = XctLeftToRight;
+                    break;
+                case XctRightToLeft:
+                    if (*length < 3)
+                        return 0;
+                    if (*str++ == XctDirection)
+                        ret = XctRightToLeft;
+                    break;
+                case XctDirectionEnd:
+                    ret = XctDirectionEnd;
+                    break;
+            }
+            break;
+    }
 
-	    SKIP_P(str)
-	    SKIP_I(str)
+    if (ret) {
+        *length -= (const char *) str - *text;
+        *text = (const char *) str;
+    }
+    return ret;
+}
 
-	    if (*str < 0x40 && *str > 0x7e)
-		break;
+/*
+ * Fills into a freshly created XlcCharSet the fields that can be inferred
+ * from the ESC sequence. These are side, char_size, set_size.
+ * Returns True if the charset can be used with Compound Text.
+ *
+ * Used by _XlcCreateDefaultCharSet.
+ */
+Bool
+_XlcParseCharSet(
+    XlcCharSet charset)
+{
+    unsigned int type;
+    unsigned char final_byte;
+    const char *ptr = charset->ct_sequence;
+    int length;
+    int char_size;
+    
+    if (*ptr == '\0')
+    	return False;
 
-	    ret = XctCSISeq;
-	    str++;
-	    goto done;
-    }
+    length = strlen(ptr);
 
-    if (ch & 0x80) {
-	if (ch < 0xa0)
-	    ret = XctC1;
-	else
-	    ret = XctGR;
-    } else {
-	if (ch == XctHT || ch == XctNL)
-	    ret = ch;
-	else if (ch < 0x20)
-	    ret = XctC0;
-	else
-	    ret = XctGL;
+    type = _XlcParseCT(&ptr, &length, &final_byte);
+
+    /* Check for validity and determine char_size.
+       char_size = 0 means varying number of bytes per character. */
+    switch (type) {
+        case XctGL94:
+        case XctGR94:
+        case XctGR96:
+            char_size = 1;
+            break;
+        case XctGL94MB:
+        case XctGR94MB:
+            char_size = (final_byte < 0x60 ? 2 : final_byte < 0x70 ? 3 : 4);
+            break;
+        case XctExtSeg:
+            char_size = final_byte - '0';
+            if (!(char_size >= 0 && char_size <= 4))
+                return False;
+            break;
+        case XctOtherCoding:
+            char_size = 0;
+            break;
+        default:
+            return False;
     }
 
-    return ret;
+    charset->char_size = char_size;
+
+    /* Fill in other values. */
+    switch (type) {
+        case XctGL94:
+        case XctGL94MB:
+            charset->side = XlcGL;
+            charset->set_size = 94;
+            break;
+        case XctGR94:
+        case XctGR94MB:
+            charset->side = XlcGR;
+            charset->set_size = 94;
+            break;
+        case XctGR96:
+            charset->side = XlcGR;
+            charset->set_size = 96;
+            break;
+        case XctExtSeg:
+        case XctOtherCoding:
+            charset->side = XlcGLGR;
+            charset->set_size = 0;
+            break;
+    }
+    return True;
+}
 
-done:
-    *length -= (char *) str - *text;
-    *text = (char *) str;
 
-    return ret;
-}
+/* =============== Management of the List of Character Sets =============== */
 
-XlcCharSet
-_XlcAddCT(name, encoding)
-    char *name;
-    char *encoding;
-{
-    CTInfo ct_info;
+/*
+ * Representation of a character set that can be used for Compound Text,
+ * at run time.
+ * Note: This information is not contained in the XlcCharSet, because
+ * multiple ESC sequences may be used for the same XlcCharSet.
+ */
+typedef struct _CTInfoRec {
     XlcCharSet charset;
-    CTParseRec parse;
-    char *ct_ptr = encoding;
-    int length;
+    const char *ct_sequence;	/* Compound Text ESC sequence */
     unsigned int type;
+    unsigned char final_byte;
+				/* If type == XctExtSeg: */
+    const char *ext_segment;	/* extended segment name, then '\002' */
+    int ext_segment_len;	/* length of above, including final '\002' */
 
-    length = strlen(encoding);
+    struct _CTInfoRec *next;
+} CTInfoRec, *CTInfo;
 
-    switch (type = _XlcParseCT(&parse, &ct_ptr, &length)) {
-	case XctExtSeg:
-	case XctGL94:
-	case XctGL94MB:
-	case XctGR94:
-	case XctGR94MB:
-	case XctGR96:
-	    if (parse.ct_info)		/* existed */
-		return parse.ct_info->charset;
-	    break;
-	default:
-	    return (XlcCharSet) NULL;
-    }
+/*
+ * List of character sets that can be used for Compound Text,
+ * Includes all that are listed in default_ct_data, but more can be added
+ * at runtime through _XlcAddCT.
+ */
+static CTInfo ct_list = NULL;
+static CTInfo *ct_list_end = &ct_list;
 
-    charset = _XlcCreateDefaultCharSet(name, encoding);
-    if (charset == NULL)
-	return (XlcCharSet) NULL;
-    _XlcAddCharSet(charset);
+/*
+ * Returns a Compound Text info record for an ESC sequence.
+ * The first part of the ESC sequence has already been parsed into 'type'
+ * and 'final_byte'. The remainder starts at 'text', at least 'text_len'
+ * bytes (only used if type == XctExtSeg).
+ */
+static CTInfo
+_XlcGetCTInfo(
+    unsigned int type,
+    unsigned char final_byte,
+    const char *text,
+    int text_len)
+{
+    CTInfo ct_info;
 
-    ct_info = (CTInfo) Xmalloc(sizeof(CTInfoRec));
-    if (ct_info == NULL)
-	return (XlcCharSet) NULL;
-    
-    ct_info->charset = charset;
-    ct_info->encoding = charset->ct_sequence;
-    ct_info->encoding_len = strlen(ct_info->encoding);
-    if (type == XctExtSeg) {
-	ct_info->ext_segment = ct_info->encoding + 6;
-	ct_info->ext_segment_len = strlen(ct_info->ext_segment);
-    } else {
-	ct_info->ext_segment = NULL;
-	ct_info->ext_segment_len = 0;
-    }
-    ct_info->next = ct_list;
-    ct_list = ct_info;
+    for (ct_info = ct_list; ct_info; ct_info = ct_info->next)
+        if (ct_info->type == type
+            && ct_info->final_byte == final_byte
+            && (type != XctExtSeg
+                || (text_len >= ct_info->ext_segment_len
+                    && memcmp(text, ct_info->ext_segment,
+                              ct_info->ext_segment_len) == 0)))
+            return ct_info;
 
-    return charset;
+    return (CTInfo) NULL;
 }
 
+/* Returns the Compound Text info for a given XlcCharSet.
+   Returns NULL if none is found. */
 static CTInfo
-_XlcGetCTInfoFromCharSet(charset)
-    register XlcCharSet charset;
+_XlcGetCTInfoFromCharSet(
+    XlcCharSet charset)
 {
-    register CTInfo ct_info;
+    CTInfo ct_info;
 
     for (ct_info = ct_list; ct_info; ct_info = ct_info->next)
 	if (ct_info->charset == charset)
@@ -392,244 +444,445 @@ _XlcGetCTInfoFromCharSet(charset)
     return (CTInfo) NULL;
 }
 
-Bool
-_XlcParseCharSet(charset)
-    XlcCharSet charset;
+/* Creates a new XlcCharSet, given its name (including side suffix) and
+   Compound Text ESC sequence (normally at most 4 bytes), and makes it
+   eligible for Compound Text processing. */
+XlcCharSet
+_XlcAddCT(
+    const char *name,
+    const char *ct_sequence)
 {
-    CTParseRec parse;
-    char *ptr, *bufp, buf[BUFSIZ];
+    CTInfo ct_info, existing_info;
+    XlcCharSet charset;
+    const char *ct_ptr;
     int length;
+    unsigned int type;
+    unsigned char final_byte;
 
-    if (charset->ct_sequence == NULL)
-	return False;
-
-    ptr = charset->ct_sequence;
-    length = strlen(ptr);
-
-    (void) _XlcParseCT(&parse, &ptr, &length);
-	
-    if (charset->name) {
-	charset->xrm_name = XrmStringToQuark(charset->name);
-
-	if ((length = strlen (charset->name)) < sizeof buf) bufp = buf;
-	else bufp = Xmalloc (length + 1);
-
-	if (bufp == NULL) return False;
-	strcpy(bufp, charset->name);
-	if ((ptr = strchr(bufp, ':')))
-	    *ptr = '\0';
-	charset->xrm_encoding_name = XrmStringToQuark(bufp);
-	if (bufp != buf) Xfree (bufp);
-	charset->encoding_name = XrmQuarkToString(charset->xrm_encoding_name);
+    charset = _XlcGetCharSet(name);
+    if (charset != NULL) {
+        /* Even if the charset already exists, it is OK to register a second
+           Compound Text sequence for it. */
     } else {
-	charset->xrm_name = 0;
-	charset->encoding_name = NULL;
-	charset->xrm_encoding_name = 0;
+        /* Attempt to create the charset. */
+        charset = _XlcCreateDefaultCharSet(name, ct_sequence);
+        if (charset == NULL)
+	    return (XlcCharSet) NULL;
+        _XlcAddCharSet(charset);
     }
 
-    charset->side = parse.side;
-    charset->char_size = parse.char_size;
-    charset->set_size = parse.set_size;
+    /* Allocate a CTinfo record. */
+    length = strlen(ct_sequence);
+    ct_info = (CTInfo) Xmalloc(sizeof(CTInfoRec) + length+1);
+    if (ct_info == NULL)
+	return charset;
 
-    return True;
-}
+    ct_info->charset = charset;
+    ct_info->ct_sequence = strcpy((char *) (ct_info + 1), ct_sequence);
 
-static void init_converter();
+    /* Parse the Compound Text sequence. */
+    ct_ptr = ct_sequence;
+    type = _XlcParseCT(&ct_ptr, &length, &final_byte);
 
-Bool
-_XlcInitCTInfo()
-{
-    register XlcCharSet charset;
-    register CTData ct_data;
-    register int num;
+    ct_info->type = type;
+    ct_info->final_byte = final_byte;
 
-    if (ct_list == NULL) {
-	num = sizeof(default_ct_data) / sizeof(CTDataRec);
-	for (ct_data = default_ct_data; num-- > 0; ct_data++) {
-	    charset = _XlcAddCT(ct_data->name, ct_data->encoding);
-	    if (charset == NULL)
-		continue;
-	}
-	init_converter();
+    switch (type) {
+	case XctGL94:
+	case XctGR94:
+	case XctGR96:
+	case XctGL94MB:
+	case XctGR94MB:
+	case XctOtherCoding:
+            ct_info->ext_segment = NULL;
+            ct_info->ext_segment_len = 0;
+            break;
+	case XctExtSeg: {
+            /* By convention, the extended segment name is the encoding_name
+               in lowercase. */
+            const char *q = charset->encoding_name;
+            int n = strlen(q);
+            char *p;
+
+            /* Ensure ct_info->ext_segment_len <= 0x3fff - 6. */
+            if (n > 0x3fff - 6 - 1) {
+                Xfree(ct_info);
+                return charset;
+            }
+            p = (char *) Xmalloc(n+1);
+            if (p == NULL) {
+                Xfree(ct_info);
+                return charset;
+            }
+            ct_info->ext_segment = p;
+            ct_info->ext_segment_len = n+1;
+            for ( ; n > 0; p++, q++, n--)
+                *p = (*q >= 'A' && *q <= 'Z' ? *q - 'A' + 'a' : *q);
+            *p = XctSTX;
+            break;
+        }
+	default:
+            Xfree(ct_info);
+            return (XlcCharSet) NULL;
     }
 
-    return True;
+    /* Insert it into the list, if not already present. */
+    existing_info =
+        _XlcGetCTInfo(type, ct_info->final_byte,
+                      ct_info->ext_segment, ct_info->ext_segment_len);
+    if (existing_info == NULL) {
+        /* Insert it at the end. If there are duplicates CTinfo entries
+           for the same XlcCharSet, we want the first (standard) one to
+           override the second (user defined) one. */
+        ct_info->next = *ct_list_end;
+        *ct_list_end = ct_info;
+    } else {
+        if (existing_info->charset != charset
+            /* We have a conflict, with one exception: JISX0208.1983-0 and
+               JISX0208.1990-0 are the same for all practical purposes. */
+            && !(strncmp(existing_info->charset->name, "JISX0208", 8) == 0
+                 && strncmp(charset->name, "JISX0208", 8) == 0)) {
+            fprintf(stderr,
+                    "Xlib: charsets %s and %s have the same CT sequence\n",
+                    charset->name, existing_info->charset->name);
+            if (strcmp(charset->ct_sequence, ct_sequence) == 0)
+                charset->ct_sequence = "";
+        }
+        Xfree(ct_info);
+    }
+
+    return charset;
 }
 
 
-static int
-_XlcCheckCTSequence(state, ctext, ctext_len)
-    State state;
-    char **ctext;
-    int *ctext_len;
+/* ========== Converters String <--> CharSet <--> Compound Text ========== */
+
+/*
+ * Structure representing the parse state of a Compound Text string.
+ */
+typedef struct _StateRec {
+    XlcCharSet charset;		/* The charset of the current segment */
+    XlcCharSet GL_charset;	/* The charset responsible for 0x00..0x7F */
+    XlcCharSet GR_charset;	/* The charset responsible for 0x80..0xFF */
+    XlcCharSet Other_charset;	/* != NULL if currently in an other segment */
+    int ext_seg_left;		/* > 0 if currently in an extended segment */
+} StateRec, *State;
+
+
+/* Subroutine for parsing an ESC sequence. */
+
+typedef enum {
+    resOK,		/* Charset saved in 'state', sequence skipped */
+    resNotInList,	/* Charset not found, sequence skipped */
+    resNotCTSeq		/* EscSeq not recognized, pointers not changed */
+} CheckResult;
+
+static CheckResult
+_XlcCheckCTSequence(
+    State state,
+    const char **ctext,
+    int *ctext_len)
 {
     XlcCharSet charset;
-    CTParseRec parse;
     CTInfo ct_info;
-    int length;
+    const char *tmp_ctext = *ctext;
+    int tmp_ctext_len = *ctext_len;
+    unsigned int type;
+    unsigned char final_byte;
+    int ext_seg_left = 0;
 
-    _XlcParseCT(&parse, ctext, ctext_len);
-
-    ct_info = parse.ct_info;
-    if (parse.ext_seg_length > 0) {	/* XctExtSeg or XctOtherSeg */
-	if (ct_info) {
-	    length = ct_info->ext_segment_len;
-	    *ctext += length;
-	    *ctext_len -= length;
-	    state->ext_seg_left = parse.ext_seg_length - length;
-	    state->ext_seg_charset = ct_info->charset;
-	} else {
-	    state->ext_seg_left = parse.ext_seg_length;
-	    state->ext_seg_charset = NULL;
-	}
-    } else if (ct_info) {
-	if ((charset = ct_info->charset)) {
-	    if (charset->side == XlcGL)
-		state->GL_charset = charset;
-	    else if (charset->side == XlcGR)
-		state->GR_charset = charset;
-	}
+    /* Check for validity. */
+    type = _XlcParseCT(&tmp_ctext, &tmp_ctext_len, &final_byte);
+
+    switch (type) {
+	case XctGL94:
+	case XctGR94:
+	case XctGR96:
+	case XctGL94MB:
+	case XctGR94MB:
+	case XctOtherCoding:
+            *ctext = tmp_ctext;
+            *ctext_len = tmp_ctext_len;
+            break;
+        case XctReturn:
+            *ctext = tmp_ctext;
+            *ctext_len = tmp_ctext_len;
+            state->Other_charset = NULL;
+            return resOK;
+        case XctExtSeg:
+            if (tmp_ctext_len > 2
+                && (tmp_ctext[0] & 0x80) && (tmp_ctext[0] & 0x80)) {
+                unsigned int msb = tmp_ctext[0] & 0x7f;
+                unsigned int lsb = tmp_ctext[1] & 0x7f;
+                ext_seg_left = (msb << 7) + lsb;
+                if (ext_seg_left <= tmp_ctext_len - 2) {
+                    *ctext = tmp_ctext + 2;
+                    *ctext_len = tmp_ctext_len - 2;
+                    break;
+                }
+            }
+            return resNotCTSeq;
+        default:
+            return resNotCTSeq;
     }
 
-    return 0;
+    ct_info = _XlcGetCTInfo(type, final_byte, *ctext, ext_seg_left);
+
+    if (ct_info) {
+        charset = ct_info->charset;
+        state->ext_seg_left = ext_seg_left;
+        if (type == XctExtSeg) {
+            state->charset = charset;
+            /* Skip past the extended segment name and the separator. */
+            *ctext += ct_info->ext_segment_len;
+            *ctext_len -= ct_info->ext_segment_len;
+            state->ext_seg_left -= ct_info->ext_segment_len;
+        } else if (type == XctOtherCoding) {
+            state->Other_charset = charset;
+        } else {
+            if (charset->side == XlcGL) {
+                state->GL_charset = charset;
+            } else if (charset->side == XlcGR) {
+                state->GR_charset = charset;
+            } else {
+                state->GL_charset = charset;
+                state->GR_charset = charset;
+            }
+        }
+        return resOK;
+    } else {
+        state->ext_seg_left = 0;
+        if (type == XctExtSeg) {
+            /* Skip the entire extended segment. */
+            *ctext += ext_seg_left;
+            *ctext_len -= ext_seg_left;
+        }
+        return resNotInList;
+    }
 }
 
-
 static void
-init_state(conv)
-    XlcConv conv;
+init_state(
+    XlcConv conv)
 {
     State state = (State) conv->state;
-    static XlcCharSet GL_charset = NULL;
-    static XlcCharSet GR_charset = NULL;
+    static XlcCharSet default_GL_charset = NULL;
+    static XlcCharSet default_GR_charset = NULL;
 
-    if (GL_charset == NULL) {
-	GL_charset = _XlcGetCharSet("ISO8859-1:GL");
-	GR_charset = _XlcGetCharSet("ISO8859-1:GR");
+    if (default_GL_charset == NULL) {
+	default_GL_charset = _XlcGetCharSet("ISO8859-1:GL");
+	default_GR_charset = _XlcGetCharSet("ISO8859-1:GR");
     }
 
-    state->GL_charset = state->charset = GL_charset;
-    state->GR_charset = GR_charset;
-    state->ext_seg_charset = NULL;
+    /* The initial state is ISO-8859-1 on both sides. */
+    state->GL_charset = state->charset = default_GL_charset;
+    state->GR_charset = default_GR_charset;
+
+    state->Other_charset = NULL;
+
     state->ext_seg_left = 0;
 }
 
+/* from XlcNCompoundText to XlcNCharSet */
+
 static int
-cttocs(conv, from, from_left, to, to_left, args, num_args)
-    XlcConv conv;
-    XPointer *from;
-    int *from_left;
-    XPointer *to;
-    int *to_left;
-    XPointer *args;
-    int num_args;
+cttocs(
+    XlcConv conv,
+    XPointer *from,
+    int *from_left,
+    XPointer *to,
+    int *to_left,
+    XPointer *args,
+    int num_args)
 {
-    register State state = (State) conv->state;
-    register unsigned char ch;
-    int length;
+    State state = (State) conv->state;
     XlcCharSet charset = NULL;
-    char *ctptr, *bufptr;
+    const char *ctptr;
+    char *bufptr;
     int ctext_len, buf_len;
+    int unconv_num = 0;
 
-    ctptr = *((char **) from);
-    bufptr = *((char **) to);
+    ctptr = (const char *) *from;
+    bufptr = (char *) *to;
     ctext_len = *from_left;
     buf_len = *to_left;
 
     while (ctext_len > 0 && buf_len > 0) {
-	if (state->ext_seg_left > 0) {
-	    length = min(state->ext_seg_left, ctext_len);
-	    length = min(length, buf_len);
-
-	    ctext_len -= length;
-	    state->ext_seg_left -= length;
-
-	    if (state->ext_seg_charset) {
-		charset = state->ext_seg_charset;
-		buf_len -= length;
-		if (charset->side == XlcGL) {
-		    while (length-- > 0)
-			*bufptr++ = *ctptr++ & 0x7f;
-		} else if (charset->side == XlcGR) {
-		    while (length-- > 0)
-			*bufptr++ = *ctptr++ | 0x80;
-		} else {
-		    while (length-- > 0)
-			*bufptr++ = *ctptr++;
-		}
-
-		if (state->ext_seg_left < 1)
-		    state->ext_seg_charset = NULL;
-	    }
-	    break;
-	}
-	ch = *((unsigned char *) ctptr);
-	if (ch == 0x1b || ch == 0x9b) {
-	    length = _XlcCheckCTSequence(state, &ctptr, &ctext_len);
-	    if (length < 0)
-		return -1;
-	    if (state->ext_seg_left > 0 && charset)
-		break;
-	} else {
-	    if (charset) {
-		if (charset != (ch & 0x80 ? state->GR_charset :
-				state->GL_charset))
-		    break;
-	    } else
-		charset = ch & 0x80 ? state->GR_charset : state->GL_charset;
-
-	    if ((ch < 0x20 && ch != '\0' && ch != '\n' && ch != '\t') ||
-		    (ch >= 0x80 && ch < 0xa0))
-		return -1;
-
-	    *bufptr++ = *ctptr++;
-	    ctext_len--;
-	    buf_len--;
-	}
+        if (state->ext_seg_left == 0) {
+            /* Not in the middle of an extended segment; look at next byte. */
+            unsigned char ch = *ctptr;
+            XlcCharSet ch_charset;
+
+            if (ch == XctESC) {
+                CheckResult ret =
+                    _XlcCheckCTSequence(state, &ctptr, &ctext_len);
+                if (ret == resOK)
+                    /* state has been modified. */
+                    continue;
+                if (ret == resNotInList) {
+                    /* XXX Just continue with previous charset. */
+                    unconv_num++;
+                    continue;
+                }
+            } else if (ch == XctCSI) {
+                /* XXX Simply ignore the XctLeftToRight, XctRightToLeft,
+                   XctDirectionEnd sequences for the moment. */
+                unsigned char dummy;
+                if (_XlcParseCT(&ctptr, &ctext_len, &dummy)) {
+                    unconv_num++;
+                    continue;
+                }
+            }
+
+            /* Find the charset which is responsible for this byte. */
+            ch_charset = (state->Other_charset != NULL ? state->Other_charset :
+                          (ch & 0x80 ? state->GR_charset : state->GL_charset));
+
+            /* Set the charset of this run, or continue the current run,
+               or stop the current run. */
+            if (charset) {
+                if (charset != ch_charset)
+                    break; 
+            } else {
+                state->charset = charset = ch_charset;
+            }
+
+            /* We don't want to split a character into multiple pieces. */
+            if (buf_len < 6) {
+                if (charset->char_size > 0) {
+                    if (buf_len < charset->char_size)
+                        break;
+                } else {
+                    /* char_size == 0 is tricky. The code here is good only
+                       for valid UTF-8 input. */
+                    if (charset->ct_sequence[0] == XctESC
+                        && charset->ct_sequence[1] == XctOtherCoding
+                        && charset->ct_sequence[2] == 'G') {
+                        int char_size = (ch < 0xc0 ? 1 :
+                                         ch < 0xe0 ? 2 :
+                                         ch < 0xf0 ? 3 :
+                                         ch < 0xf8 ? 4 :
+                                         ch < 0xfc ? 5 :
+                                                     6);
+                        if (buf_len < char_size)
+                            break;
+                    }
+                }
+            }
+
+            *bufptr++ = *ctptr++;
+            ctext_len--;
+            buf_len--;
+        } else {
+            /* Copy as much as possible from the current extended segment
+               to the buffer. */
+            int char_size;
+
+            /* Set the charset of this run, or continue the current run,
+               or stop the current run. */
+            if (charset) {
+                if (charset != state->charset)
+                    break; 
+            } else {
+                charset = state->charset;
+            }
+
+            char_size = charset->char_size;
+
+            if (state->ext_seg_left <= buf_len || char_size > 0) {
+                int n = (state->ext_seg_left <= buf_len
+                         ? state->ext_seg_left
+                         : (buf_len / char_size) * char_size);
+                memcpy(bufptr, ctptr, n);
+                ctptr += n; ctext_len -= n;
+                bufptr += n; buf_len -= n;
+                state->ext_seg_left -= n;
+            } else {
+#if UTF8_IN_EXTSEQ
+                /* char_size == 0 is tricky. The code here is good only
+                   for valid UTF-8 input. */
+                if (strcmp(charset->name, "ISO10646-1") == 0) {
+                    unsigned char ch = *ctptr;
+                    int char_size = (ch < 0xc0 ? 1 :
+                                     ch < 0xe0 ? 2 :
+                                     ch < 0xf0 ? 3 :
+                                     ch < 0xf8 ? 4 :
+                                     ch < 0xfc ? 5 :
+                                                 6);
+                    int i;
+                    if (buf_len < char_size)
+                        break;
+                    /* A small loop is faster than calling memcpy. */
+                    for (i = char_size; i > 0; i--)
+                        *bufptr++ = *ctptr++;
+                    ctext_len -= char_size;
+                    buf_len -= char_size;
+                    state->ext_seg_left -= char_size;
+                } else
+#endif
+                {
+                    /* Here ctext_len >= state->ext_seg_left > buf_len.
+                       We may be splitting a character into multiple pieces.
+                       Oh well. */
+                    int n = buf_len;
+                    memcpy(bufptr, ctptr, n);
+                    ctptr += n; ctext_len -= n;
+                    bufptr += n; buf_len -= n;
+                    state->ext_seg_left -= n;
+                }
+            }
+        }
     }
 
-    if (charset)
-	state->charset = charset;
+    /* 'charset' is the charset for the current run. In some cases,
+       'state->charset' contains the charset for the next run. Therefore,
+       return 'charset'.
+       'charset' may still be NULL only if no output was produced. */
     if (num_args > 0)
-	*((XlcCharSet *) args[0]) = state->charset;
+	*((XlcCharSet *) args[0]) = charset;
 
-    *from_left -= ctptr - *((char **) from);
+    *from_left -= ctptr - *((const char **) from);
     *from = (XPointer) ctptr;
 
     *to_left -= bufptr - *((char **) to);
     *to = (XPointer) bufptr;
 
-    return 0;
+    return unconv_num;
 }
 
+/* from XlcNCharSet to XlcNCompoundText */
+
 static int
-cstoct(conv, from, from_left, to, to_left, args, num_args)
-    XlcConv conv;
-    XPointer *from;
-    int *from_left;
-    XPointer *to;
-    int *to_left;
-    XPointer *args;
-    int num_args;
+cstoct(
+    XlcConv conv,
+    XPointer *from,
+    int *from_left,
+    XPointer *to,
+    int *to_left,
+    XPointer *args,
+    int num_args)
 {
     State state = (State) conv->state;
     XlcSide side;
-    unsigned char min_ch, max_ch;
-    register unsigned char ch;
-    int length;
+    unsigned char min_ch = 0, max_ch = 0;
+    int length, unconv_num;
     CTInfo ct_info;
     XlcCharSet charset;
-    char *csptr, *ctptr;
+    const char *csptr;
+    char *ctptr;
     int csstr_len, ct_len;
+    char *ext_segment_start;
+    int char_size;
 
+    /* One argument is required, of type XlcCharSet. */
     if (num_args < 1)
 	return -1;
-    
-    csptr = *((char **) from);
+
+    csptr = *((const char **) from);
     ctptr = *((char **) to);
     csstr_len = *from_left;
     ct_len = *to_left;
-    
+
     charset = (XlcCharSet) args[0];
 
     ct_info = _XlcGetCTInfoFromCharSet(charset);
@@ -637,74 +890,170 @@ cstoct(conv, from, from_left, to, to_left, args, num_args)
 	return -1;
 
     side = charset->side;
+    length = strlen(ct_info->ct_sequence);
+
+    ext_segment_start = NULL;
+
+    if (ct_info->type == XctOtherCoding) {
+        /* Output the Escape sequence for switching to the charset, and
+           reserve room now for the XctReturn sequence at the end. */
+        if (ct_len < length + 3)
+            return -1;
+
+        memcpy(ctptr, ct_info->ct_sequence, length);
+        ctptr += length;
+        ct_len -= length + 3;
+    } else
+    /* Test whether the charset is already active. */
+    if (((side == XlcGR || side == XlcGLGR)
+	 && charset != state->GR_charset)
+	|| ((side == XlcGL || side == XlcGLGR)
+	    && charset != state->GL_charset)) {
+
+        /* Output the Escape sequence for switching to the charset. */
+        if (ct_info->type == XctExtSeg) {
+            if (ct_len < length + 2 + ct_info->ext_segment_len)
+                return -1;
+
+            memcpy(ctptr, ct_info->ct_sequence, length);
+            ctptr += length;
+            ct_len -= length;
+
+            ctptr += 2;
+            ct_len -= 2;
+            ext_segment_start = ctptr;
+
+            /* The size of an extended segment must fit in 14 bits. */
+            if (ct_len > 0x3fff)
+                ct_len = 0x3fff;
+
+            memcpy(ctptr, ct_info->ext_segment, ct_info->ext_segment_len);
+            ctptr += ct_info->ext_segment_len;
+            ct_len -= ct_info->ext_segment_len;
+        } else {
+            if (ct_len < length)
+                return -1;
+
+            memcpy(ctptr, ct_info->ct_sequence, length);
+            ctptr += length;
+            ct_len -= length;
+        }
+    }
 
-    if (ct_info->ext_segment) {
-	if (charset != state->ext_seg_charset && state->ext_seg_left < 1) {
-	    length = ct_info->encoding_len;
-	    if (ct_len < length)
-		return -1;
-	    strcpy(ctptr, ct_info->encoding);
-	    ctptr[4] = ((ct_info->ext_segment_len + csstr_len) / 128) | 0x80;
-	    ctptr[5] = ((ct_info->ext_segment_len + csstr_len) % 128) | 0x80;
-	    ctptr += length;
-	    ct_len -= length;
-	    state->ext_seg_left = csstr_len;
-	}
-	length = min(state->ext_seg_left, csstr_len);
-	state->ext_seg_left -= length;
-
-	if (side == XlcGL) {
-	    while (length-- > 0)
-		*ctptr++ = *csptr++ & 0x7f;
-	} else if (side == XlcGR) {
-	    while (length-- > 0)
-		*ctptr++ = *csptr++ | 0x80;
-	} else {
-	    while (length-- > 0)
-		*ctptr++ = *csptr++;
-	}
-	state->ext_seg_charset = (state->ext_seg_left > 0) ? charset : NULL;
-    } else {
-	if ((side == XlcGR && charset != state->GR_charset) ||
-	    (side == XlcGL && charset != state->GL_charset)) {
-
-	    ct_len -= ct_info->encoding_len;
-	    if (ct_len < 0)
-		return -1;
-	    strcpy(ctptr, ct_info->encoding);
-	    ctptr += ct_info->encoding_len;
-	}
-
-	min_ch = 0x20;
-	max_ch = 0x7f;
-
-	if (charset->set_size == 94) {
-	    max_ch--;
+    /* If the charset has side GL or GR, prepare remapping the characters
+       to the correct side. */
+    if (charset->set_size) {
+        min_ch = 0x20;
+        max_ch = 0x7f;
+        if (charset->set_size == 94) {
+            max_ch--;
 	    if (charset->char_size > 1 || side == XlcGR)
 		min_ch++;
-	}
+        }
+    }
 
+    /* Actually copy the contents. */
+    unconv_num = 0;
+    char_size = charset->char_size;
+    if (char_size == 1) {
 	while (csstr_len > 0 && ct_len > 0) {
-	    ch = *((unsigned char *) csptr++) & 0x7f;
-	    if (ch < min_ch || ch > max_ch)
-		if (ch != 0x00 && ch != 0x09 && ch != 0x0a && ch != 0x1b)
-		    continue;	/* XXX */
+	    if (charset->set_size) {
+		/* The CompoundText specification says that the only
+		   control characters allowed are 0x09, 0x0a, 0x1b, 0x9b.
+		   Therefore here we eliminate other control characters. */
+		unsigned char ch = *((unsigned char *) csptr) & 0x7f;
+		if (!((ch >= min_ch && ch <= max_ch)
+		      || (side == XlcGL
+			  && (ch == 0x00 || ch == 0x09 || ch == 0x0a))
+		      || ((side == XlcGL || side == XlcGR)
+			  && (ch == 0x1b)))) {
+                    csptr++;
+                    csstr_len--;
+		    unconv_num++;
+                    continue;
+ 		}
+	    }
+
 	    if (side == XlcGL)
-		*ctptr++ = ch & 0x7f;
+		*ctptr++ = *csptr++ & 0x7f;
 	    else if (side == XlcGR)
-		*ctptr++ = ch | 0x80;
+		*ctptr++ = *csptr++ | 0x80;
 	    else
-		*ctptr++ = ch;
+		*ctptr++ = *csptr++;
 	    csstr_len--;
 	    ct_len--;
 	}
-	if (side == XlcGR)
-	    state->GR_charset = charset;
-	else if (side == XlcGL)
-	    state->GL_charset = charset;
+    } else if (char_size > 1) {
+	while (csstr_len >= char_size && ct_len >= char_size) {
+	    if (side == XlcGL) {
+		int i;
+		for (i = char_size; i > 0; i--)
+		    *ctptr++ = *csptr++ & 0x7f;
+	    } else if (side == XlcGR) {
+		int i;
+		for (i = char_size; i > 0; i--)
+		    *ctptr++ = *csptr++ | 0x80;
+	    } else {
+		int i;
+		for (i = char_size; i > 0; i--)
+		    *ctptr++ = *csptr++;
+	    }
+	    csstr_len -= char_size;
+	    ct_len -= char_size;
+	}
+    } else {
+        /* char_size = 0. The code here is good only for valid UTF-8 input. */
+        if ((charset->ct_sequence[0] == XctESC
+             && charset->ct_sequence[1] == XctOtherCoding
+             && charset->ct_sequence[2] == 'G')
+#if UTF8_IN_EXTSEQ
+            || strcmp(charset->name, "ISO10646-1") == 0
+#endif
+           ) {
+            while (csstr_len > 0 && ct_len > 0) {
+                unsigned char ch = * (unsigned char *) csptr;
+                int char_size = (ch < 0xc0 ? 1 :
+                                 ch < 0xe0 ? 2 :
+                                 ch < 0xf0 ? 3 :
+                                 ch < 0xf8 ? 4 :
+                                 ch < 0xfc ? 5 :
+                                             6);
+                int i;
+                if (!(csstr_len >= char_size && ct_len >= char_size))
+                    break;
+                for (i = char_size; i > 0; i--)
+                    *ctptr++ = *csptr++;
+                csstr_len -= char_size;
+                ct_len -= char_size;
+            }
+        } else {
+            while (csstr_len > 0 && ct_len > 0) {
+                *ctptr++ = *csptr++;
+                csstr_len--;
+                ct_len--;
+            }
+        }
     }
 
-    *from_left -= csptr - *((char **) from);
+    if (ct_info->type == XctOtherCoding) {
+        /* Terminate with an XctReturn sequence. */
+        ctptr[0] = XctESC;
+        ctptr[1] = XctOtherCoding;
+        ctptr[2] = '@';
+        ctptr += 3;
+    } else if (ext_segment_start != NULL) {
+        /* Backpatch the extended segment's length. */
+        int ext_segment_length = ctptr - ext_segment_start;
+        *(ext_segment_start - 2) = (ext_segment_length >> 7) | 0x80;
+        *(ext_segment_start - 1) = (ext_segment_length & 0x7f) | 0x80;
+    } else {
+        if (side == XlcGR || side == XlcGLGR)
+            state->GR_charset = charset;
+        if (side == XlcGL || side == XlcGLGR)
+            state->GL_charset = charset;
+    }
+
+    *from_left -= csptr - *((const char **) from);
     *from = (XPointer) csptr;
 
     *to_left -= ctptr - *((char **) to);
@@ -713,22 +1062,25 @@ cstoct(conv, from, from_left, to, to_left, args, num_args)
     return 0;
 }
 
+/* from XlcNString to XlcNCharSet */
+
 static int
-strtocs(conv, from, from_left, to, to_left, args, num_args)
-    XlcConv conv;
-    XPointer *from;
-    int *from_left;
-    XPointer *to;
-    int *to_left;
-    XPointer *args;
-    int num_args;
+strtocs(
+    XlcConv conv,
+    XPointer *from,
+    int *from_left,
+    XPointer *to,
+    int *to_left,
+    XPointer *args,
+    int num_args)
 {
     State state = (State) conv->state;
-    register char *src, *dst;
+    const char *src;
+    char *dst;
     unsigned char side;
-    register int length;
+    int length;
 
-    src = (char *) *from;
+    src = (const char *) *from;
     dst = (char *) *to;
 
     length = min(*from_left, *to_left);
@@ -737,46 +1089,54 @@ strtocs(conv, from, from_left, to, to_left, args, num_args)
     while (side == (*((unsigned char *) src) & 0x80) && length-- > 0)
 	*dst++ = *src++;
     
-    *from_left -= src - (char *) *from;
+    *from_left -= src - (const char *) *from;
     *from = (XPointer) src;
     *to_left -= dst - (char *) *to;
     *to = (XPointer) dst;
 
     if (num_args > 0)
-	*((XlcCharSet *)args[0]) = side ? state->GR_charset : state->GL_charset;
+	*((XlcCharSet *)args[0]) = (side ? state->GR_charset : state->GL_charset);
 
     return 0;
 }
 
+/* from XlcNCharSet to XlcNString */
+
 static int
-cstostr(conv, from, from_left, to, to_left, args, num_args)
-    XlcConv conv;
-    XPointer *from;
-    int *from_left;
-    XPointer *to;
-    int *to_left;
-    XPointer *args;
-    int num_args;
+cstostr(
+    XlcConv conv,
+    XPointer *from,
+    int *from_left,
+    XPointer *to,
+    int *to_left,
+    XPointer *args,
+    int num_args)
 {
     State state = (State) conv->state;
-    char *csptr, *string_ptr;
+    const char *csptr;
+    char *string_ptr;
     int csstr_len, str_len;
     unsigned char ch;
     int unconv_num = 0;
 
-    if (num_args < 1 || (state->GL_charset != (XlcCharSet) args[0] &&
-	state->GR_charset != (XlcCharSet) args[0]))
+    /* This converter can only convert from ISO8859-1:GL and ISO8859-1:GR. */
+    if (num_args < 1
+	|| !((XlcCharSet) args[0] == state->GL_charset
+	     || (XlcCharSet) args[0] == state->GR_charset))
 	return -1;
     
-    csptr = *((char **) from);
+    csptr = *((const char **) from);
     string_ptr = *((char **) to);
     csstr_len = *from_left;
     str_len = *to_left;
 
-    while (csstr_len-- > 0 && str_len > 0) {
+    while (csstr_len > 0 && str_len > 0) {
 	ch = *((unsigned char *) csptr++);
-	if ((ch < 0x20 && ch != 0x00 && ch != 0x09 && ch != 0x0a) ||
-	    ch == 0x7f || ((ch & 0x80) && ch < 0xa0)) {
+	csstr_len--;
+	/* Citing ICCCM: "STRING as a type specifies the ISO Latin-1 character
+	   set plus the control characters TAB and NEWLINE." */
+	if ((ch < 0x20 && ch != 0x00 && ch != 0x09 && ch != 0x0a)
+	    || (ch >= 0x7f && ch < 0xa0)) {
 	    unconv_num++;
 	    continue;
 	}
@@ -784,7 +1144,7 @@ cstostr(conv, from, from_left, to, to_left, args, num_args)
 	str_len--;
     }
 
-    *from_left -= csptr - *((char **) from);
+    *from_left -= csptr - *((const char **) from);
     *from = (XPointer) csptr;
 
     *to_left -= string_ptr - *((char **) to);
@@ -794,116 +1154,141 @@ cstostr(conv, from, from_left, to, to_left, args, num_args)
 }
 
 
-static void
-close_converter(conv)
-    XlcConv conv;
-{
-    if (conv->state)
-	Xfree((char *) conv->state);
-
-    Xfree((char *) conv);
-}
-
 static XlcConv
-create_conv(methods)
-    XlcConvMethods methods;
+create_conv(
+    XlcConvMethods methods)
 {
-    register XlcConv conv;
+    XlcConv conv;
 
-    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec));
+    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + sizeof(StateRec));
     if (conv == NULL)
 	return (XlcConv) NULL;
 
-    conv->state = (XPointer) Xmalloc(sizeof(StateRec));
-    if (conv->state == NULL)
-	goto err;
-    
+    conv->state = (XPointer) &conv[1];
+
     conv->methods = methods;
 
     init_state(conv);
 
     return conv;
+}
 
-err:
-    close_converter(conv);
-
-    return (XlcConv) NULL;
+static void
+close_converter(
+    XlcConv conv)
+{
+    /* conv->state is allocated together with conv, free both at once.  */
+    Xfree((char *) conv);
 }
 
+
 static XlcConvMethodsRec cttocs_methods = {
     close_converter,
     cttocs,
     init_state
-} ;
+};
 
 static XlcConv
-open_cttocs(from_lcd, from_type, to_lcd, to_type)
-    XLCd from_lcd;
-    char *from_type;
-    XLCd to_lcd;
-    char *to_type;
+open_cttocs(
+    XLCd from_lcd,
+    const char *from_type,
+    XLCd to_lcd,
+    const char *to_type)
 {
     return create_conv(&cttocs_methods);
 }
 
+
 static XlcConvMethodsRec cstoct_methods = {
     close_converter,
     cstoct,
     init_state
-} ;
+};
 
 static XlcConv
-open_cstoct(from_lcd, from_type, to_lcd, to_type)
-    XLCd from_lcd;
-    char *from_type;
-    XLCd to_lcd;
-    char *to_type;
+open_cstoct(
+    XLCd from_lcd,
+    const char *from_type,
+    XLCd to_lcd,
+    const char *to_type)
 {
     return create_conv(&cstoct_methods);
 }
 
+
 static XlcConvMethodsRec strtocs_methods = {
     close_converter,
     strtocs,
     init_state
-} ;
+};
 
 static XlcConv
-open_strtocs(from_lcd, from_type, to_lcd, to_type)
-    XLCd from_lcd;
-    char *from_type;
-    XLCd to_lcd;
-    char *to_type;
+open_strtocs(
+    XLCd from_lcd,
+    const char *from_type,
+    XLCd to_lcd,
+    const char *to_type)
 {
     return create_conv(&strtocs_methods);
 }
 
+
 static XlcConvMethodsRec cstostr_methods = {
     close_converter,
     cstostr,
     init_state
-} ;
+};
 
 static XlcConv
-open_cstostr(from_lcd, from_type, to_lcd, to_type)
-    XLCd from_lcd;
-    char *from_type;
-    XLCd to_lcd;
-    char *to_type;
+open_cstostr(
+    XLCd from_lcd,
+    const char *from_type,
+    XLCd to_lcd,
+    const char *to_type)
 {
     return create_conv(&cstostr_methods);
 }
 
-static void
-init_converter()
+
+/* =========================== Initialization =========================== */
+
+Bool
+_XlcInitCTInfo()
 {
-    _XlcSetConverter((XLCd) NULL, XlcNCompoundText, (XLCd) NULL, XlcNCharSet,
-		     open_cttocs);
-    _XlcSetConverter((XLCd) NULL, XlcNString, (XLCd) NULL, XlcNCharSet,
-		     open_strtocs);
-
-    _XlcSetConverter((XLCd) NULL, XlcNCharSet, (XLCd) NULL, XlcNCompoundText,
-		     open_cstoct);
-    _XlcSetConverter((XLCd) NULL, XlcNCharSet, (XLCd) NULL, XlcNString,
-		     open_cstostr);
+    if (ct_list == NULL) {
+        CTData ct_data;
+        int num;
+        XlcCharSet charset;
+
+        /* Initialize ct_list.  */
+
+	num = sizeof(default_ct_data) / sizeof(CTDataRec);
+	for (ct_data = default_ct_data; num > 0; ct_data++, num--) {
+	    charset = _XlcAddCT(ct_data->name, ct_data->ct_sequence);
+            if (charset == NULL)
+                continue;
+			if (strncmp(charset->ct_sequence, "\x1b\x25\x2f", 3) != 0)
+				charset->source = CSsrcStd;
+			else
+				charset->source = CSsrcXLC;
+	}
+
+        /* Register CompoundText and CharSet converters.  */
+
+        _XlcSetConverter((XLCd) NULL, XlcNCompoundText,
+                         (XLCd) NULL, XlcNCharSet,
+                         open_cttocs);
+        _XlcSetConverter((XLCd) NULL, XlcNString,
+                         (XLCd) NULL, XlcNCharSet,
+                         open_strtocs);
+
+        _XlcSetConverter((XLCd) NULL, XlcNCharSet,
+                         (XLCd) NULL, XlcNCompoundText,
+                         open_cstoct);
+        _XlcSetConverter((XLCd) NULL, XlcNCharSet,
+                         (XLCd) NULL, XlcNString,
+                         open_cstostr);
+    }
+
+    return True;
 }