summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNot Zed <NotZed@Ximian.com>2003-01-13 05:46:35 +0000
committerMichael Zucci <zucchi@src.gnome.org>2003-01-13 05:46:35 +0000
commitb877b3dc8f8abb83fc67c1899abcd8fd75a9d181 (patch)
treedfb45e8fd7a780c1961950ddb6f2bfd80f80c3b0
parent0f32fbf40a4f009ffbe908f6351f5ac9c2f862b4 (diff)
downloadevolution-data-server-b877b3dc8f8abb83fc67c1899abcd8fd75a9d181.tar.gz
Read the characters as utf8, rather than as 8 bit bytes. Remove the
2003-01-13 Not Zed <NotZed@Ximian.com> * camel-mime-filter-tohtml.c (writeln): Read the characters as utf8, rather than as 8 bit bytes. Remove the PRESERVE_8BIT as it has no meaning. Also change the default logic slightly so that 8 bit or greater characters are properly converted to entities. * camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8 char, bounded by an end pointer.
-rw-r--r--camel/ChangeLog10
-rw-r--r--camel/camel-mime-filter-tohtml.c38
-rw-r--r--camel/camel-utf8.c56
3 files changed, 87 insertions, 17 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog
index f9c2ce1c3..916e4b6f7 100644
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@@ -1,3 +1,13 @@
+2003-01-13 Not Zed <NotZed@Ximian.com>
+
+ * camel-mime-filter-tohtml.c (writeln): Read the characters as
+ utf8, rather than as 8 bit bytes. Remove the PRESERVE_8BIT as it
+ has no meaning. Also change the default logic slightly so that 8
+ bit or greater characters are properly converted to entities.
+
+ * camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8
+ char, bounded by an end pointer.
+
2003-01-07 Dan Winship <danw@ximian.com>
* camel-provider.h (CamelProvider): add a "translation_domain"
diff --git a/camel/camel-mime-filter-tohtml.c b/camel/camel-mime-filter-tohtml.c
index 4f9d97262..370d9c6c4 100644
--- a/camel/camel-mime-filter-tohtml.c
+++ b/camel/camel-mime-filter-tohtml.c
@@ -28,6 +28,7 @@
#include <stdio.h>
#include <string.h>
+#include "camel-utf8.h"
#include "camel-url-scanner.h"
#include "camel-mime-filter-tohtml.h"
@@ -147,14 +148,18 @@ static char *
writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend)
{
CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
- register const char *inptr = in;
-
+ const char *inptr = in;
+
while (inptr < inend) {
- unsigned char u;
-
- outptr = check_size (filter, outptr, outend, 9);
-
- switch ((u = (unsigned char) *inptr++)) {
+ guint32 u;
+
+ outptr = check_size (filter, outptr, outend, 16);
+
+ u = camel_utf8_getc_limit(&inptr, inend);
+ switch (u) {
+ case 0xffff:
+ g_warning("Truncated utf8 buffer");
+ return outptr;
case '<':
outptr = g_stpcpy (outptr, "&lt;");
html->column++;
@@ -182,22 +187,21 @@ writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outpt
}
/* otherwise, FALL THROUGH */
case ' ':
- if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES) {
- if (inptr == (in + 1) || *inptr == ' ' || *inptr == '\t') {
- outptr = g_stpcpy (outptr, "&nbsp;");
- html->column++;
- break;
- }
+ if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES
+ && ((inptr == (in + 1) || *inptr == ' ' || *inptr == '\t'))) {
+ outptr = g_stpcpy (outptr, "&nbsp;");
+ html->column++;
+ break;
}
/* otherwise, FALL THROUGH */
default:
- if (!(u >= 0x20 && u < 0x80) && !(html->flags & CAMEL_MIME_FILTER_TOHTML_PRESERVE_8BIT)) {
+ if (u >= 20 && u <0x80)
+ *outptr++ = u;
+ else {
if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT)
*outptr++ = '?';
else
- outptr += g_snprintf (outptr, 9, "&#%d;", (int) u);
- } else {
- *outptr++ = (char) u;
+ outptr += sprintf(outptr, "&#%u;", u);
}
html->column++;
break;
diff --git a/camel/camel-utf8.c b/camel/camel-utf8.c
index 5ed5a476d..3c7af65b4 100644
--- a/camel/camel-utf8.c
+++ b/camel/camel-utf8.c
@@ -83,6 +83,62 @@ loop:
return v;
}
+/**
+ * camel_utf8_getc_limit:
+ * @ptr:
+ * @end: must not be NULL.
+ *
+ * Get the next utf8 char at @ptr, and return it, advancing @ptr to
+ * the next character. If @end is reached before a full utf8
+ * character can be read, then the invalid Unicode char 0xffff is
+ * returned as a sentinel (Unicode 3.1, section 2.7), and @ptr is not
+ * advanced.
+ *
+ * Return value: The next utf8 char, or 0xffff.
+ **/
+guint32
+camel_utf8_getc_limit(const unsigned char **ptr, const unsigned char *end)
+{
+ register unsigned char *p = (unsigned char *)*ptr;
+ register unsigned char c, r;
+ register guint32 v = 0xffff, m;
+
+again:
+ while (p < end) {
+ r = *p++;
+loop:
+ if (r < 0x80) {
+ *ptr = p;
+ return r;
+ } else if (r < 0xf8) { /* valid start char? (max 4 octets) */
+ v = r;
+ m = 0x7f80; /* used to mask out the length bits */
+ do {
+ if (p >= end)
+ return 0xffff;
+
+ c = *p++;
+ if ((c & 0xc0) != 0x80) {
+ r = c;
+ goto loop;
+ }
+ v = (v<<6) | (c & 0x3f);
+ r<<=1;
+ m<<=5;
+ } while (r & 0x40);
+
+ *ptr = p;
+
+ v &= ~m;
+ return v;
+ } else {
+ goto again;
+ }
+ }
+
+ return 0xffff;
+}
+
void
g_string_append_u(GString *out, guint32 c)
{