summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeffrey Stedfast <fejj@ximian.com>2004-02-14 01:49:56 +0000
committerJeffrey Stedfast <fejj@src.gnome.org>2004-02-14 01:49:56 +0000
commitff0b0440dc0f21b1a6b51385fd990b4ff4862691 (patch)
tree4f1e55ff5f0874d3ba5eb738631f010aae0f4ca8
parenta8a536c9862522c7d4737b978073b1c96458afb5 (diff)
downloadgmime-ff0b0440dc0f21b1a6b51385fd990b4ff4862691.tar.gz
Pop our end boundary before scanning postface text data. Prevents a hang
2004-02-13 Jeffrey Stedfast <fejj@ximian.com> * gmime/gmime-parser.c (parser_construct_multipart): Pop our end boundary before scanning postface text data. Prevents a hang if a multipart brokenly has 2 end boundaries. 2004-02-05 Jeffrey Stedfast <fejj@ximian.com> * gmime/url-scanner.c: Added single/double quotes to url_braces[] in case the user is quoting the url. (g_url_web_end): Add "-;:" to list of punctuation to strip off the end of urls. Also fixed to handle user@domain's (g_url_addrspec_start): Strip open brace characters from the beginning of the addr. (g_url_web_start): Make sure "www" wasn't part of something not a url (like "Ewww.Gross") by check that pos[-1] is either an open brace or whitespace. (g_url_addrspec_end): Don't allow toplevel domain addr-specs (if we encounter something that looks like it is a toplevel domain addr, it is more likely to be bogus than correct).
-rw-r--r--ChangeLog21
-rw-r--r--gmime/gmime-parser.c5
-rw-r--r--gmime/url-scanner.c108
3 files changed, 103 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index 752ee449..9cd0a7fc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2004-02-13 Jeffrey Stedfast <fejj@ximian.com>
+
+ * gmime/gmime-parser.c (parser_construct_multipart): Pop our end
+ boundary before scanning postface text data. Prevents a hang if a
+ multipart brokenly has 2 end boundaries.
+
+2004-02-05 Jeffrey Stedfast <fejj@ximian.com>
+
+ * gmime/url-scanner.c: Added single/double quotes to url_braces[]
+ in case the user is quoting the url.
+ (g_url_web_end): Add "-;:" to list of punctuation to strip off the
+ end of urls. Also fixed to handle user@domain's
+ (g_url_addrspec_start): Strip open brace characters from the
+ beginning of the addr.
+ (g_url_web_start): Make sure "www" wasn't part of something not a
+ url (like "Ewww.Gross") by check that pos[-1] is either an open
+ brace or whitespace.
+ (g_url_addrspec_end): Don't allow toplevel domain addr-specs
+ (if we encounter something that looks like it is a toplevel domain
+ addr, it is more likely to be bogus than correct).
+
2004-01-25 Jeffrey Stedfast <fejj@ximian.com>
* gmime/gmime-param.c: Disable debugging and conditionally disable
diff --git a/gmime/gmime-parser.c b/gmime/gmime-parser.c
index bd593217..791f0058 100644
--- a/gmime/gmime-parser.c
+++ b/gmime/gmime-parser.c
@@ -1230,10 +1230,11 @@ parser_construct_multipart (GMimeParser *parser, GMimeContentType *content_type,
if (*found == FOUND_END_BOUNDARY && found_immediate_boundary (priv)) {
/* eat end boundary */
parser_skip_line (parser);
+ parser_pop_boundary (parser);
*found = parser_scan_multipart_postface (parser, multipart);
+ } else {
+ parser_pop_boundary (parser);
}
-
- parser_pop_boundary (parser);
} else {
w(g_warning ("multipart without boundary encountered"));
/* this will scan everything into the preface */
diff --git a/gmime/url-scanner.c b/gmime/url-scanner.c
index dacc9770..fa69e735 100644
--- a/gmime/url-scanner.c
+++ b/gmime/url-scanner.c
@@ -139,6 +139,45 @@ enum {
#define is_domain(x) ((url_scanner_table[(unsigned char)(x)] & IS_DOMAIN) != 0)
#define is_urlsafe(x) ((url_scanner_table[(unsigned char)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0)
+static struct {
+ char open;
+ char close;
+} url_braces[] = {
+ { '(', ')' },
+ { '{', '}' },
+ { '[', ']' },
+ { '<', '>' },
+ { '|', '|' },
+};
+
+static gboolean
+is_open_brace (char c)
+{
+ int i;
+
+ for (i = 0; i < G_N_ELEMENTS (url_braces); i++) {
+ if (c == url_braces[i].open)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static char
+url_stop_at_brace (const char *in, size_t so)
+{
+ int i;
+
+ if (so > 0) {
+ for (i = 0; i < 4; i++) {
+ if (in[so - 1] == url_braces[i].open)
+ return url_braces[i].close;
+ }
+ }
+
+ return '\0';
+}
+
gboolean
g_url_addrspec_start (const char *in, const char *pos, const char *inend, urlmatch_t *match)
@@ -162,7 +201,7 @@ g_url_addrspec_start (const char *in, const char *pos, const char *inend, urlmat
inptr--;
}
- if (!is_atom (*inptr))
+ if (!is_atom (*inptr) || is_open_brace (*inptr))
inptr++;
if (inptr == pos)
@@ -178,6 +217,7 @@ g_url_addrspec_end (const char *in, const char *pos, const char *inend, urlmatch
{
const char *inptr = pos;
int parts = 0, digits;
+ gboolean got_dot = FALSE;
g_assert (*inptr == '@');
@@ -204,6 +244,8 @@ g_url_addrspec_end (const char *in, const char *pos, const char *inend, urlmatch
inptr++;
else
return FALSE;
+
+ got_dot = TRUE;
} else {
while (inptr < inend) {
if (is_domain (*inptr))
@@ -214,12 +256,15 @@ g_url_addrspec_end (const char *in, const char *pos, const char *inend, urlmatch
while (inptr < inend && is_domain (*inptr))
inptr++;
- if (inptr < inend && *inptr == '.' && is_domain (inptr[1]))
+ if (inptr < inend && *inptr == '.' && is_domain (inptr[1])) {
+ if (*inptr == '.')
+ got_dot = TRUE;
inptr++;
+ }
}
}
- if (inptr == pos + 1)
+ if (inptr == pos + 1 || !got_dot)
return FALSE;
match->um_eo = (inptr - in);
@@ -228,32 +273,6 @@ g_url_addrspec_end (const char *in, const char *pos, const char *inend, urlmatch
}
-static struct {
- char open;
- char close;
-} url_braces[] = {
- { '(', ')' },
- { '{', '}' },
- { '[', ']' },
- { '<', '>' },
-};
-
-static char
-url_stop_at_brace (const char *in, size_t so)
-{
- int i;
-
- if (so > 0) {
- for (i = 0; i < 4; i++) {
- if (in[so - 1] == url_braces[i].open)
- return url_braces[i].close;
- }
- }
-
- return '\0';
-}
-
-
gboolean
g_url_file_start (const char *in, const char *pos, const char *inend, urlmatch_t *match)
{
@@ -323,7 +342,31 @@ g_url_web_end (const char *in, const char *pos, const char *inend, urlmatch_t *m
inptr++;
} while (parts < 4);
+ } else if (is_atom (*inptr)) {
+ /* might be a domain or user@domain */
+ const char *save = inptr;
+
+ while (inptr < inend) {
+ if (!is_atom (*inptr))
+ break;
+
+ inptr++;
+
+ while (inptr < inend && is_atom (*inptr))
+ inptr++;
+
+ if (inptr < inend && *inptr == '.' && is_atom (inptr[1]))
+ inptr++;
+ }
+
+ if (*inptr != '@')
+ inptr = save;
+ else
+ inptr++;
+
+ goto domain;
} else if (is_domain (*inptr)) {
+ domain:
while (inptr < inend) {
if (is_domain (*inptr))
inptr++;
@@ -368,6 +411,13 @@ g_url_web_end (const char *in, const char *pos, const char *inend, urlmatch_t *m
}
}
+ /* urls are extremely unlikely to end with any
+ * punctuation, so strip any trailing
+ * punctuation off. Also strip off any closing
+ * braces or quotes. */
+ while (inptr > pos && strchr (",.:;?!-|)}]'\"", inptr[-1]))
+ inptr--;
+
match->um_eo = (inptr - in);
return TRUE;