Pop our end boundary before scanning postface text data. Prevents a hang

2004-02-13 Jeffrey Stedfast <fejj@ximian.com> * gmime/gmime-parser.c (parser_construct_multipart): Pop our end boundary before scanning postface text data. Prevents a hang if a multipart brokenly has 2 end boundaries. 2004-02-05 Jeffrey Stedfast <fejj@ximian.com> * gmime/url-scanner.c: Added single/double quotes to url_braces[] in case the user is quoting the url. (g_url_web_end): Add "-;:" to list of punctuation to strip off the end of urls. Also fixed to handle user@domain's (g_url_addrspec_start): Strip open brace characters from the beginning of the addr. (g_url_web_start): Make sure "www" wasn't part of something not a url (like "Ewww.Gross") by check that pos[-1] is either an open brace or whitespace. (g_url_addrspec_end): Don't allow toplevel domain addr-specs (if we encounter something that looks like it is a toplevel domain addr, it is more likely to be bogus than correct).
author: Jeffrey Stedfast <fejj@ximian.com> 2004-02-14 01:49:56 +0000
committer: Jeffrey Stedfast <fejj@src.gnome.org> 2004-02-14 01:49:56 +0000
commit: ff0b0440dc0f21b1a6b51385fd990b4ff4862691 (patch)
tree: 4f1e55ff5f0874d3ba5eb738631f010aae0f4ca8
parent: a8a536c9862522c7d4737b978073b1c96458afb5 (diff)
download: gmime-ff0b0440dc0f21b1a6b51385fd990b4ff4862691.tar.gz
3 files changed, 103 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index 752ee449..9cd0a7fc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2004-02-13  Jeffrey Stedfast  <fejj@ximian.com>
+
+	* gmime/gmime-parser.c (parser_construct_multipart): Pop our end
+	boundary before scanning postface text data. Prevents a hang if a
+	multipart brokenly has 2 end boundaries.
+
+2004-02-05  Jeffrey Stedfast  <fejj@ximian.com>
+
+	* gmime/url-scanner.c: Added single/double quotes to url_braces[]
+	in case the user is quoting the url.
+	(g_url_web_end): Add "-;:" to list of punctuation to strip off the
+	end of urls. Also fixed to handle user@domain's
+	(g_url_addrspec_start): Strip open brace characters from the
+	beginning of the addr.
+	(g_url_web_start): Make sure "www" wasn't part of something not a
+	url (like "Ewww.Gross") by check that pos[-1] is either an open
+	brace or whitespace.
+	(g_url_addrspec_end): Don't allow toplevel domain addr-specs
+	(if we encounter something that looks like it is a toplevel domain
+	addr, it is more likely to be bogus than correct).
+
 2004-01-25  Jeffrey Stedfast  <fejj@ximian.com>
 
 	* gmime/gmime-param.c: Disable debugging and conditionally disable
diff --git a/gmime/gmime-parser.c b/gmime/gmime-parser.c
index bd593217..791f0058 100644
--- a/gmime/gmime-parser.c
+++ b/gmime/gmime-parser.c
@@ -1230,10 +1230,11 @@ parser_construct_multipart (GMimeParser *parser, GMimeContentType *content_type,
 		if (*found == FOUND_END_BOUNDARY && found_immediate_boundary (priv)) {
 			/* eat end boundary */
 			parser_skip_line (parser);
+			parser_pop_boundary (parser);
 			*found = parser_scan_multipart_postface (parser, multipart);
+		} else {
+			parser_pop_boundary (parser);
 		}
-		
-		parser_pop_boundary (parser);
 	} else {
 		w(g_warning ("multipart without boundary encountered"));
 		/* this will scan everything into the preface */
diff --git a/gmime/url-scanner.c b/gmime/url-scanner.c
index dacc9770..fa69e735 100644
--- a/gmime/url-scanner.c
+++ b/gmime/url-scanner.c
@@ -139,6 +139,45 @@ enum {
 #define is_domain(x) ((url_scanner_table[(unsigned char)(x)] & IS_DOMAIN) != 0)
 #define is_urlsafe(x) ((url_scanner_table[(unsigned char)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0)
 
+static struct {
+	char open;
+	char close;
+} url_braces[] = {
+	{ '(', ')' },
+	{ '{', '}' },
+	{ '[', ']' },
+	{ '<', '>' },
+	{ '|', '|' },
+};
+
+static gboolean
+is_open_brace (char c)
+{
+	int i;
+	
+	for (i = 0; i < G_N_ELEMENTS (url_braces); i++) {
+		if (c == url_braces[i].open)
+			return TRUE;
+	}
+	
+	return FALSE;
+}
+
+static char
+url_stop_at_brace (const char *in, size_t so)
+{
+	int i;
+	
+	if (so > 0) {
+		for (i = 0; i < 4; i++) {
+			if (in[so - 1] == url_braces[i].open)
+				return url_braces[i].close;
+		}
+	}
+	
+	return '\0';
+}
+
 
 gboolean
 g_url_addrspec_start (const char *in, const char *pos, const char *inend, urlmatch_t *match)
@@ -162,7 +201,7 @@ g_url_addrspec_start (const char *in, const char *pos, const char *inend, urlmat
 			inptr--;
 	}
 	
-	if (!is_atom (*inptr))
+	if (!is_atom (*inptr) || is_open_brace (*inptr))
 		inptr++;
 	
 	if (inptr == pos)
@@ -178,6 +217,7 @@ g_url_addrspec_end (const char *in, const char *pos, const char *inend, urlmatch
 {
 	const char *inptr = pos;
 	int parts = 0, digits;
+	gboolean got_dot = FALSE;
 	
 	g_assert (*inptr == '@');
 	
@@ -204,6 +244,8 @@ g_url_addrspec_end (const char *in, const char *pos, const char *inend, urlmatch
 			inptr++;
 		else
 			return FALSE;
+		
+		got_dot = TRUE;
 	} else {
 		while (inptr < inend) {
 			if (is_domain (*inptr))
@@ -214,12 +256,15 @@ g_url_addrspec_end (const char *in, const char *pos, const char *inend, urlmatch
 			while (inptr < inend && is_domain (*inptr))
 				inptr++;
 			
-			if (inptr < inend && *inptr == '.' && is_domain (inptr[1]))
+			if (inptr < inend && *inptr == '.' && is_domain (inptr[1])) {
+				if (*inptr == '.')
+					got_dot = TRUE;
 				inptr++;
+			}
 		}
 	}
 	
-	if (inptr == pos + 1)
+	if (inptr == pos + 1 || !got_dot)
 		return FALSE;
 	
 	match->um_eo = (inptr - in);
@@ -228,32 +273,6 @@ g_url_addrspec_end (const char *in, const char *pos, const char *inend, urlmatch
 }
 
 
-static struct {
-	char open;
-	char close;
-} url_braces[] = {
-	{ '(', ')' },
-	{ '{', '}' },
-	{ '[', ']' },
-	{ '<', '>' },
-};
-
-static char
-url_stop_at_brace (const char *in, size_t so)
-{
-	int i;
-	
-	if (so > 0) {
-		for (i = 0; i < 4; i++) {
-			if (in[so - 1] == url_braces[i].open)
-				return url_braces[i].close;
-		}
-	}
-	
-	return '\0';
-}
-
-
 gboolean
 g_url_file_start (const char *in, const char *pos, const char *inend, urlmatch_t *match)
 {
@@ -323,7 +342,31 @@ g_url_web_end (const char *in, const char *pos, const char *inend, urlmatch_t *m
 				inptr++;
 			
 		} while (parts < 4);
+	} else if (is_atom (*inptr)) {
+		/* might be a domain or user@domain */
+		const char *save = inptr;
+		
+		while (inptr < inend) {
+			if (!is_atom (*inptr))
+				break;
+			
+			inptr++;
+			
+			while (inptr < inend && is_atom (*inptr))
+				inptr++;
+			
+			if (inptr < inend && *inptr == '.' && is_atom (inptr[1]))
+				inptr++;
+		}
+		
+		if (*inptr != '@')
+			inptr = save;
+		else
+			inptr++;
+		
+		goto domain;
 	} else if (is_domain (*inptr)) {
+	domain:
 		while (inptr < inend) {
 			if (is_domain (*inptr))
 				inptr++;
@@ -368,6 +411,13 @@ g_url_web_end (const char *in, const char *pos, const char *inend, urlmatch_t *m
 		}
 	}
 	
+	/* urls are extremely unlikely to end with any
+	 * punctuation, so strip any trailing
+	 * punctuation off. Also strip off any closing
+	 * braces or quotes. */
+	while (inptr > pos && strchr (",.:;?!-|)}]'\"", inptr[-1]))
+		inptr--;
+	
 	match->um_eo = (inptr - in);
 	
 	return TRUE;
author	Jeffrey Stedfast <fejj@ximian.com>	2004-02-14 01:49:56 +0000
committer	Jeffrey Stedfast <fejj@src.gnome.org>	2004-02-14 01:49:56 +0000
commit	ff0b0440dc0f21b1a6b51385fd990b4ff4862691 (patch)
tree	4f1e55ff5f0874d3ba5eb738631f010aae0f4ca8
parent	a8a536c9862522c7d4737b978073b1c96458afb5 (diff)
download	gmime-ff0b0440dc0f21b1a6b51385fd990b4ff4862691.tar.gz