summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeffrey Stedfast <fejj@ximian.com>2003-02-13 03:59:52 +0000
committerJeffrey Stedfast <fejj@src.gnome.org>2003-02-13 03:59:52 +0000
commite25854da53b55dfbcb3b382f599de0446a580c64 (patch)
treef94b92618175781bbf8b40b47e44cd2d6ad3bd2d
parent7806a58579faff5071a3707b65d747f3ffe8790a (diff)
downloadgmime-e25854da53b55dfbcb3b382f599de0446a580c64.tar.gz
Fixed the table to treat >=127 as a CTRL character.
2003-02-12 Jeffrey Stedfast <fejj@ximian.com> * gmime/url-scanner.c: Fixed the table to treat >=127 as a CTRL character. * gmime/gtrie.c (trie_utf8_getc): When we encounter an invalid UTF-8 sequence, update in to point to in+1 and return 0xffff. (g_trie_add): Handle invalid UTF-8 sequences (ie, c == 0xffff). (g_trie_search): Same. * gmime/gmime-filter-html.c (html_utf8_getc): Shortcut if inptr == inend by returning 0 and treating it as if we found the terminating nul-char.
-rw-r--r--ChangeLog14
-rw-r--r--gmime/gmime-filter-html.c5
-rw-r--r--gmime/gtrie.c26
-rw-r--r--gmime/url-scanner.c19
4 files changed, 48 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 6f669857..17cb0d47 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2003-02-12 Jeffrey Stedfast <fejj@ximian.com>
+
+ * gmime/url-scanner.c: Fixed the table to treat >=127 as a CTRL
+ character.
+
+ * gmime/gtrie.c (trie_utf8_getc): When we encounter an invalid
+ UTF-8 sequence, update in to point to in+1 and return 0xffff.
+ (g_trie_add): Handle invalid UTF-8 sequences (ie, c == 0xffff).
+ (g_trie_search): Same.
+
+ * gmime/gmime-filter-html.c (html_utf8_getc): Shortcut if inptr ==
+ inend by returning 0 and treating it as if we found the
+ terminating nul-char.
+
2003-02-08 Jeffrey Stedfast <fejj@ximian.com>
* configure.in: Bumped the version to 2.0.1
diff --git a/gmime/gmime-filter-html.c b/gmime/gmime-filter-html.c
index 13e82608..85912d85 100644
--- a/gmime/gmime-filter-html.c
+++ b/gmime/gmime-filter-html.c
@@ -190,6 +190,9 @@ html_utf8_getc (const unsigned char **in, const unsigned char *inend)
register unsigned char c, r;
register gunichar u, m;
+ if (inptr == inend)
+ return 0;
+
while (inptr < inend) {
r = *inptr++;
loop:
@@ -239,7 +242,7 @@ writeln (GMimeFilter *filter, const unsigned char *in, const unsigned char *inen
u = html_utf8_getc (&inptr, inend);
switch (u) {
case 0xffff:
- g_warning ("truncated UTF-8 sequence encountered");
+ g_warning ("Invalid UTF-8 sequence encountered");
return outptr;
break;
case '<':
diff --git a/gmime/gtrie.c b/gmime/gtrie.c
index 643ac203..06645f4e 100644
--- a/gmime/gtrie.c
+++ b/gmime/gtrie.c
@@ -63,7 +63,7 @@ trie_utf8_getc (const unsigned char **in, size_t inlen)
register const unsigned char *inptr = *in;
const unsigned char *inend = inptr + inlen;
register unsigned char c, r;
- register gunichar u, m;
+ register gunichar m, u = 0;
if (inlen == 0)
return 0;
@@ -82,10 +82,8 @@ trie_utf8_getc (const unsigned char **in, size_t inlen)
return 0;
c = *inptr++;
- if ((c & 0xc0) != 0x80) {
- r = c;
- goto loop;
- }
+ if ((c & 0xc0) != 0x80)
+ goto error;
u = (u << 6) | (c & 0x3f);
r <<= 1;
@@ -96,7 +94,9 @@ trie_utf8_getc (const unsigned char **in, size_t inlen)
u &= ~m;
} else {
- goto again;
+ error:
+ *in = (*in) + 1;
+ u = 0xfffe;
}
return u;
@@ -226,6 +226,12 @@ g_trie_add (GTrie *trie, const char *pattern, int pattern_id)
q = &trie->root;
while ((c = trie_utf8_getc (&inptr, -1))) {
+ if (c == 0xfffe) {
+ g_warning ("Invalid UTF-8 sequence in pattern '%s' at %s",
+ pattern, (inptr - 1));
+ continue;
+ }
+
if (trie->icase)
c = g_unichar_tolower (c);
@@ -314,6 +320,14 @@ g_trie_search (GTrie *trie, const char *buffer, size_t buflen, int *matched_id)
while ((c = trie_utf8_getc (&inptr, inlen))) {
inlen = (inend - inptr);
+ if (c == 0xfffe) {
+ prev = (inptr - 1);
+ pat = (const unsigned char *) buffer + buflen;
+ g_warning ("Invalid UTF-8 in buffer '%.*s' at %.*s",
+ buflen, buffer, pat - prev, prev);
+ pat = prev = inptr;
+ }
+
if (trie->icase)
c = g_unichar_tolower (c);
diff --git a/gmime/url-scanner.c b/gmime/url-scanner.c
index a0438286..bb8fb10d 100644
--- a/gmime/url-scanner.c
+++ b/gmime/url-scanner.c
@@ -110,14 +110,14 @@ static unsigned char url_scanner_table[256] = {
66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,128,
128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
enum {
@@ -375,9 +375,10 @@ url_scanner_table_init (void)
url_scanner_table[i] |= IS_DIGIT | IS_DOMAIN;
if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z'))
url_scanner_table[i] |= IS_ALPHA | IS_DOMAIN;
+ if (i >= 127)
+ url_scanner_table[i] |= IS_CTRL;
}
- url_scanner_table[127] |= IS_CTRL;
url_scanner_table[' '] |= IS_SPACE;
url_scanner_table['-'] |= IS_DOMAIN;