summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadrul Habib Chowdhury <sadrul@users.sourceforge.net>2010-03-05 17:54:04 -0500
committerSadrul Habib Chowdhury <sadrul@users.sourceforge.net>2010-03-05 17:54:04 -0500
commit47dd66ef7d204c70b67e8fa20f00ca9c25dde47b (patch)
treeb325aeb14831de641964b2e97800b14ef7fc7a87
parent776d733821b3638cf0795d0fa08858c9c32802ca (diff)
downloadscreen-47dd66ef7d204c70b67e8fa20f00ca9c25dde47b.tar.gz
Fix encoding into 16+bit unicode.
This fix to 16+bit unicode encoding should fix hardcopy/log issues.
-rw-r--r--src/encoding.c109
-rw-r--r--src/mark.c2
2 files changed, 67 insertions, 44 deletions
diff --git a/src/encoding.c b/src/encoding.c
index be3ae62..c8b3d1a 100644
--- a/src/encoding.c
+++ b/src/encoding.c
@@ -668,51 +668,72 @@ static Tab tab[] =
0, /* end of table */
};
+#define FOR_EACH_BYTE_IN_UTF8(c, bytefn, combfn) do \
+ { \
+ int byte = 0; \
+ if (c >= 0xe000) \
+ { \
+ int _tm = 0; \
+ Tab *_t; \
+ for (_t = tab; _t->cmask; _t++) \
+ { \
+ if (c <= _t->lmask) \
+ { \
+ _tm = _t->shift; \
+ byte = _t->cval | (c>>_tm); \
+ bytefn \
+ while (_tm > 0) \
+ { \
+ _tm -= 6; \
+ byte = 0x80 | ((c>>_tm) & 0x3F); \
+ bytefn \
+ } \
+ break; \
+ } \
+ } \
+ break; \
+ } \
+ \
+ if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800]) \
+ { \
+ combfn \
+ } \
+ if (c >= 0x800) \
+ { \
+ byte = (c & 0xf000) >> 12 | 0xe0; \
+ bytefn \
+ c = (c & 0x0fff) | 0x1000; \
+ } \
+ if (c >= 0x80) \
+ { \
+ byte = (c & 0x1fc0) >> 6 ^ 0xc0; \
+ bytefn \
+ c = (c & 0x3f) | 0x80; \
+ } \
+ byte = c; \
+ bytefn \
+ } while (0)
void
AddUtf8(c)
int c;
{
ASSERT(D_encoding == UTF8);
- if (c >= 0xe000)
- {
- int l = 0;
- Tab *t;
- for (t = tab; t->cmask; t++)
- {
- if (c <= t->lmask)
- {
- l = t->shift;
- AddChar(t->cval | (c>>l));
- while (l > 0)
- {
- l -= 6;
- AddChar(0x80 | ((c>>l) & 0x3F));
- }
- break;
- }
- }
- return;
- }
- if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
+ FOR_EACH_BYTE_IN_UTF8(c,
+ {
+ AddChar(byte);
+ },
{
AddUtf8(combchars[c - 0xd800]->c1);
c = combchars[c - 0xd800]->c2;
}
- if (c >= 0x800)
- {
- AddChar((c & 0xf000) >> 12 | 0xe0);
- c = (c & 0x0fff) | 0x1000;
- }
- if (c >= 0x80)
- {
- AddChar((c & 0x1fc0) >> 6 ^ 0xc0);
- c = (c & 0x3f) | 0x80;
- }
- AddChar(c);
+ );
}
+#if 0
+/* It feels like a good idea to simply use one ToUtf8, instead of having both
+ * ToUtf8_comb and ToUtf8. */
int
ToUtf8_comb(p, c)
char *p;
@@ -727,29 +748,27 @@ int c;
}
return ToUtf8(p, c);
}
+#endif
int
ToUtf8(p, c)
char *p;
int c;
{
- int l = 1;
- if (c >= 0x800)
+ int l = 0;
+ FOR_EACH_BYTE_IN_UTF8(c,
{
if (p)
- *p++ = (c & 0xf000) >> 12 | 0xe0;
+ *p++ = byte;
l++;
- c = (c & 0x0fff) | 0x1000;
- }
- if (c >= 0x80)
+ },
{
+ l += ToUtf8(p, combchars[c - 0xd800]->c1);
+ c = combchars[c - 0xd800]->c2;
if (p)
- *p++ = (c & 0x1fc0) >> 6 ^ 0xc0;
- l++;
- c = (c & 0x3f) | 0x80;
+ p += l;
}
- if (p)
- *p++ = c;
+ );
return l;
}
@@ -1399,6 +1418,9 @@ int *fontp;
#ifdef UTF8
if (encoding == UTF8)
{
+#if 0
+ /* We didn't use to handle 16+bit unicode correctly. But since now we do (in ToUtf8),
+ * do we need this? */
if (f)
{
# ifdef DW_CHARS
@@ -1415,6 +1437,7 @@ int *fontp;
c = recode_char_to_encoding(c, encoding);
}
}
+#endif
return ToUtf8(bp, c);
}
if ((c & 0xff00) && f == 0) /* is_utf8? */
diff --git a/src/mark.c b/src/mark.c
index 4a3da83..c4edd65 100644
--- a/src/mark.c
+++ b/src/mark.c
@@ -344,7 +344,7 @@ char *pt;
c |= cf << 8;
if (c == UCS_HIDDEN)
continue;
- c = ToUtf8_comb(pt, c);
+ c = ToUtf8(pt, c);
l += c;
if (pt)
pt += c;