/* * @file util.h Utility Functions * @ingroup core */ /* Purple is the legal property of its developers, whose names are too numerous * to list here. Please refer to the COPYRIGHT file distributed with this * source distribution. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ #include "internal.h" #include "cipher.h" #include "conversation.h" #include "core.h" #include "debug.h" #include "notify.h" #include "prpl.h" #include "prefs.h" #include "util.h" struct _PurpleUtilFetchUrlData { PurpleUtilFetchUrlCallback callback; void *user_data; struct { char *user; char *passwd; char *address; int port; char *page; } website; char *url; int num_times_redirected; gboolean full; char *user_agent; gboolean http11; char *request; gsize request_written; gboolean include_headers; gboolean is_ssl; PurpleSslConnection *ssl_connection; PurpleProxyConnectData *connect_data; int fd; guint inpa; gboolean got_headers; gboolean has_explicit_data_len; char *webdata; gsize len; unsigned long data_len; gssize max_len; gboolean chunked; }; static char *custom_user_dir = NULL; static char *user_dir = NULL; PurpleMenuAction * purple_menu_action_new(const char *label, PurpleCallback callback, gpointer data, GList *children) { PurpleMenuAction *act = g_new0(PurpleMenuAction, 1); act->label = g_strdup(label); act->callback = callback; act->data = data; act->children = children; return act; } void purple_menu_action_free(PurpleMenuAction *act) { g_return_if_fail(act != NULL); g_free(act->label); g_free(act); } void purple_util_init(void) { /* This does nothing right now. It exists for symmetry with * purple_util_uninit() and forwards compatibility. */ } void purple_util_uninit(void) { /* Free these so we don't have leaks at shutdown. */ g_free(custom_user_dir); custom_user_dir = NULL; g_free(user_dir); user_dir = NULL; } /************************************************************************** * Base16 Functions **************************************************************************/ gchar * purple_base16_encode(const guchar *data, gsize len) { int i; gchar *ascii = NULL; g_return_val_if_fail(data != NULL, NULL); g_return_val_if_fail(len > 0, NULL); ascii = g_malloc(len * 2 + 1); for (i = 0; i < len; i++) g_snprintf(&ascii[i * 2], 3, "%02hhx", data[i]); return ascii; } guchar * purple_base16_decode(const char *str, gsize *ret_len) { int len, i, accumulator = 0; guchar *data; g_return_val_if_fail(str != NULL, NULL); len = strlen(str); g_return_val_if_fail(strlen(str) > 0, 0); g_return_val_if_fail(len % 2 == 0, 0); data = g_malloc(len / 2); for (i = 0; i < len; i++) { if ((i % 2) == 0) accumulator = 0; else accumulator <<= 4; if (isdigit(str[i])) accumulator |= str[i] - 48; else { switch(tolower(str[i])) { case 'a': accumulator |= 10; break; case 'b': accumulator |= 11; break; case 'c': accumulator |= 12; break; case 'd': accumulator |= 13; break; case 'e': accumulator |= 14; break; case 'f': accumulator |= 15; break; } } if (i % 2) data[(i - 1) / 2] = accumulator; } if (ret_len != NULL) *ret_len = len / 2; return data; } gchar * purple_base16_encode_chunked(const guchar *data, gsize len) { int i; gchar *ascii = NULL; g_return_val_if_fail(data != NULL, NULL); g_return_val_if_fail(len > 0, NULL); /* For each byte of input, we need 2 bytes for the hex representation * and 1 for the colon. * The final colon will be replaced by a terminating NULL */ ascii = g_malloc(len * 3 + 1); for (i = 0; i < len; i++) g_snprintf(&ascii[i * 3], 4, "%02hhx:", data[i]); /* Replace the final colon with NULL */ ascii[len * 3 - 1] = 0; return ascii; } /************************************************************************** * Base64 Functions **************************************************************************/ static const char alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" "0123456789+/"; static const char xdigits[] = "0123456789abcdef"; gchar * purple_base64_encode(const guchar *data, gsize len) { return g_base64_encode(data, len); } guchar * purple_base64_decode(const char *str, gsize *ret_len) { /* * We want to allow ret_len to be NULL for backward compatibility, * but g_base64_decode() requires a valid length variable. So if * ret_len is NULL then pass in a dummy variable. */ gsize unused; return g_base64_decode(str, ret_len != NULL ? ret_len : &unused); } /************************************************************************** * Quoted Printable Functions (see RFC 2045). **************************************************************************/ guchar * purple_quotedp_decode(const char *str, gsize *ret_len) { char *n, *new; const char *end, *p; n = new = g_malloc(strlen (str) + 1); end = str + strlen(str); for (p = str; p < end; p++, n++) { if (*p == '=') { if (p[1] == '\r' && p[2] == '\n') { /* 5.1 #5 */ n -= 1; p += 2; } else if (p[1] == '\n') { /* fuzzy case for 5.1 #5 */ n -= 1; p += 1; } else if (p[1] && p[2]) { char *nibble1 = strchr(xdigits, tolower(p[1])); char *nibble2 = strchr(xdigits, tolower(p[2])); if (nibble1 && nibble2) { /* 5.1 #1 */ *n = ((nibble1 - xdigits) << 4) | (nibble2 - xdigits); p += 2; } else { /* This should never happen */ *n = *p; } } else { /* This should never happen */ *n = *p; } } else if (*p == '_') *n = ' '; else *n = *p; } *n = '\0'; if (ret_len != NULL) *ret_len = n - new; /* Resize to take less space */ /* new = realloc(new, n - new); */ return (guchar *)new; } /************************************************************************** * MIME Functions **************************************************************************/ char * purple_mime_decode_field(const char *str) { /* * This is wing's version, partially based on revo/shx's version * See RFC2047 [which apparently obsoletes RFC1342] */ typedef enum { state_start, state_equal1, state_question1, state_charset, state_question2, state_encoding, state_question3, state_encoded_text, state_question4, state_equal2 = state_start } encoded_word_state_t; encoded_word_state_t state = state_start; const char *cur, *mark; const char *charset0 = NULL, *encoding0 = NULL, *encoded_text0 = NULL; GString *new; /* token can be any CHAR (supposedly ISO8859-1/ISO2022), not just ASCII */ #define token_char_p(c) \ (c != ' ' && !iscntrl(c) && !strchr("()<>@,;:\"/[]?.=", c)) /* But encoded-text must be ASCII; alas, isascii() may not exist */ #define encoded_text_char_p(c) \ ((c & 0x80) == 0 && c != '?' && c != ' ' && isgraph(c)) g_return_val_if_fail(str != NULL, NULL); new = g_string_new(NULL); /* Here we will be looking for encoded words and if they seem to be * valid then decode them. * They are of this form: =?charset?encoding?text?= */ for (cur = str, mark = NULL; *cur; cur += 1) { switch (state) { case state_equal1: if (*cur == '?') { state = state_question1; } else { g_string_append_len(new, mark, cur - mark + 1); state = state_start; } break; case state_question1: if (token_char_p(*cur)) { charset0 = cur; state = state_charset; } else { /* This should never happen */ g_string_append_len(new, mark, cur - mark + 1); state = state_start; } break; case state_charset: if (*cur == '?') { state = state_question2; } else if (!token_char_p(*cur)) { /* This should never happen */ g_string_append_len(new, mark, cur - mark + 1); state = state_start; } break; case state_question2: if (token_char_p(*cur)) { encoding0 = cur; state = state_encoding; } else { /* This should never happen */ g_string_append_len(new, mark, cur - mark + 1); state = state_start; } break; case state_encoding: if (*cur == '?') { state = state_question3; } else if (!token_char_p(*cur)) { /* This should never happen */ g_string_append_len(new, mark, cur - mark + 1); state = state_start; } break; case state_question3: if (encoded_text_char_p(*cur)) { encoded_text0 = cur; state = state_encoded_text; } else if (*cur == '?') { /* empty string */ encoded_text0 = cur; state = state_question4; } else { /* This should never happen */ g_string_append_len(new, mark, cur - mark + 1); state = state_start; } break; case state_encoded_text: if (*cur == '?') { state = state_question4; } else if (!encoded_text_char_p(*cur)) { g_string_append_len(new, mark, cur - mark + 1); state = state_start; } break; case state_question4: if (*cur == '=') { /* Got the whole encoded-word */ char *charset = g_strndup(charset0, encoding0 - charset0 - 1); char *encoding = g_strndup(encoding0, encoded_text0 - encoding0 - 1); char *encoded_text = g_strndup(encoded_text0, cur - encoded_text0 - 1); guchar *decoded = NULL; gsize dec_len; if (g_ascii_strcasecmp(encoding, "Q") == 0) decoded = purple_quotedp_decode(encoded_text, &dec_len); else if (g_ascii_strcasecmp(encoding, "B") == 0) decoded = purple_base64_decode(encoded_text, &dec_len); else decoded = NULL; if (decoded) { gsize len; char *converted = g_convert((const gchar *)decoded, dec_len, "utf-8", charset, NULL, &len, NULL); if (converted) { g_string_append_len(new, converted, len); g_free(converted); } g_free(decoded); } g_free(charset); g_free(encoding); g_free(encoded_text); state = state_equal2; /* Restart the FSM */ } else { /* This should never happen */ g_string_append_len(new, mark, cur - mark + 1); state = state_start; } break; default: if (*cur == '=') { mark = cur; state = state_equal1; } else { /* Some unencoded text. */ g_string_append_c(new, *cur); } break; } /* switch */ } /* for */ if (state != state_start) g_string_append_len(new, mark, cur - mark + 1); return g_string_free(new, FALSE);; } /************************************************************************** * Date/Time Functions **************************************************************************/ const char *purple_get_tzoff_str(const struct tm *tm, gboolean iso) { static char buf[7]; long off; gint8 min; gint8 hrs; struct tm new_tm = *tm; mktime(&new_tm); if (new_tm.tm_isdst < 0) g_return_val_if_reached(""); #ifdef _WIN32 if ((off = wpurple_get_tz_offset()) == -1) return ""; #else # ifdef HAVE_TM_GMTOFF off = new_tm.tm_gmtoff; # else # ifdef HAVE_TIMEZONE tzset(); off = -1 * timezone; # endif /* HAVE_TIMEZONE */ # endif /* !HAVE_TM_GMTOFF */ #endif /* _WIN32 */ min = (off / 60) % 60; hrs = ((off / 60) - min) / 60; if(iso) { if (0 == off) { strcpy(buf, "Z"); } else { /* please leave the colons...they're optional for iso, but jabber * wants them */ if(g_snprintf(buf, sizeof(buf), "%+03d:%02d", hrs, ABS(min)) > 6) g_return_val_if_reached(""); } } else { if (g_snprintf(buf, sizeof(buf), "%+03d%02d", hrs, ABS(min)) > 5) g_return_val_if_reached(""); } return buf; } /* Windows doesn't HAVE_STRFTIME_Z_FORMAT, but this seems clearer. -- rlaager */ #if !defined(HAVE_STRFTIME_Z_FORMAT) || defined(_WIN32) static size_t purple_internal_strftime(char *s, size_t max, const char *format, const struct tm *tm) { const char *start; const char *c; char *fmt = NULL; /* Yes, this is checked in purple_utf8_strftime(), * but better safe than sorry. -- rlaager */ g_return_val_if_fail(format != NULL, 0); /* This is fairly efficient, and it only gets * executed on Windows or if the underlying * system doesn't support the %z format string, * for strftime() so I think it's good enough. * -- rlaager */ for (c = start = format; *c ; c++) { if (*c != '%') continue; c++; #ifndef HAVE_STRFTIME_Z_FORMAT if (*c == 'z') { char *tmp = g_strdup_printf("%s%.*s%s", fmt ? fmt : "", c - start - 1, start, purple_get_tzoff_str(tm, FALSE)); g_free(fmt); fmt = tmp; start = c + 1; } #endif #ifdef _WIN32 if (*c == 'Z') { char *tmp = g_strdup_printf("%s%.*s%s", fmt ? fmt : "", c - start - 1, start, wpurple_get_timezone_abbreviation(tm)); g_free(fmt); fmt = tmp; start = c + 1; } #endif } if (fmt != NULL) { size_t ret; if (*start) { char *tmp = g_strconcat(fmt, start, NULL); g_free(fmt); fmt = tmp; } ret = strftime(s, max, fmt, tm); g_free(fmt); return ret; } return strftime(s, max, format, tm); } #else /* HAVE_STRFTIME_Z_FORMAT && !_WIN32 */ #define purple_internal_strftime strftime #endif const char * purple_utf8_strftime(const char *format, const struct tm *tm) { static char buf[128]; char *locale; GError *err = NULL; int len; char *utf8; g_return_val_if_fail(format != NULL, NULL); if (tm == NULL) { time_t now = time(NULL); tm = localtime(&now); } locale = g_locale_from_utf8(format, -1, NULL, NULL, &err); if (err != NULL) { purple_debug_error("util", "Format conversion failed in purple_utf8_strftime(): %s\n", err->message); g_error_free(err); err = NULL; locale = g_strdup(format); } /* A return value of 0 is either an error (in * which case, the contents of the buffer are * undefined) or the empty string (in which * case, no harm is done here). */ if ((len = purple_internal_strftime(buf, sizeof(buf), locale, tm)) == 0) { g_free(locale); return ""; } g_free(locale); utf8 = g_locale_to_utf8(buf, len, NULL, NULL, &err); if (err != NULL) { purple_debug_error("util", "Result conversion failed in purple_utf8_strftime(): %s\n", err->message); g_error_free(err); } else { purple_strlcpy(buf, utf8); g_free(utf8); } return buf; } const char * purple_date_format_short(const struct tm *tm) { return purple_utf8_strftime("%x", tm); } const char * purple_date_format_long(const struct tm *tm) { /* * This string determines how some dates are displayed. The default * string "%x %X" shows the date then the time. Translators can * change this to "%X %x" if they want the time to be shown first, * followed by the date. */ return purple_utf8_strftime(_("%x %X"), tm); } const char * purple_date_format_full(const struct tm *tm) { return purple_utf8_strftime("%c", tm); } const char * purple_time_format(const struct tm *tm) { return purple_utf8_strftime("%X", tm); } time_t purple_time_build(int year, int month, int day, int hour, int min, int sec) { struct tm tm; tm.tm_year = year - 1900; tm.tm_mon = month - 1; tm.tm_mday = day; tm.tm_hour = hour; tm.tm_min = min; tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60; return mktime(&tm); } time_t purple_str_to_time(const char *timestamp, gboolean utc, struct tm *tm, long *tz_off, const char **rest) { time_t retval = 0; static struct tm t; const char *c = timestamp; int year = 0; long tzoff = PURPLE_NO_TZ_OFF; time(&retval); localtime_r(&retval, &t); if (rest != NULL) *rest = NULL; /* 4 digit year */ if (sscanf(c, "%04d", &year) && year > 1900) { c += 4; if (*c == '-') c++; t.tm_year = year - 1900; } /* 2 digit month */ if (!sscanf(c, "%02d", &t.tm_mon)) { if (rest != NULL && *c != '\0') *rest = c; return 0; } c += 2; if (*c == '-' || *c == '/') c++; t.tm_mon -= 1; /* 2 digit day */ if (!sscanf(c, "%02d", &t.tm_mday)) { if (rest != NULL && *c != '\0') *rest = c; return 0; } c += 2; if (*c == '/') { c++; if (!sscanf(c, "%04d", &t.tm_year)) { if (rest != NULL && *c != '\0') *rest = c; return 0; } t.tm_year -= 1900; } else if (*c == 'T' || *c == '.') { c++; /* we have more than a date, keep going */ /* 2 digit hour */ if ((sscanf(c, "%02d:%02d:%02d", &t.tm_hour, &t.tm_min, &t.tm_sec) == 3 && (c = c + 8)) || (sscanf(c, "%02d%02d%02d", &t.tm_hour, &t.tm_min, &t.tm_sec) == 3 && (c = c + 6))) { gboolean offset_positive = FALSE; int tzhrs; int tzmins; t.tm_isdst = -1; if (*c == '.') { do { c++; } while (*c >= '0' && *c <= '9'); /* dealing with precision we don't care about */ } if (*c == '+') offset_positive = TRUE; if (((*c == '+' || *c == '-') && (c = c + 1)) && ((sscanf(c, "%02d:%02d", &tzhrs, &tzmins) == 2 && (c = c + 5)) || (sscanf(c, "%02d%02d", &tzhrs, &tzmins) == 2 && (c = c + 4)))) { tzoff = tzhrs*60*60 + tzmins*60; if (offset_positive) tzoff *= -1; } else if ((*c == 'Z') && (c = c + 1)) { /* 'Z' = Zulu = UTC */ tzoff = 0; } else if (utc) { static struct tm tmptm; time_t tmp; tmp = mktime(&t); /* we care about whether it *was* dst, and the offset, here on this * date, not whether we are currently observing dst locally *now*. * This isn't perfect, because we would need to know in advance the * offset we are trying to work out in advance to be sure this * works for times around dst transitions but it'll have to do. */ localtime_r(&tmp, &tmptm); t.tm_isdst = tmptm.tm_isdst; #ifdef HAVE_TM_GMTOFF t.tm_gmtoff = tmptm.tm_gmtoff; #endif } if (rest != NULL && *c != '\0') { if (*c == ' ') c++; if (*c != '\0') *rest = c; } if (tzoff != PURPLE_NO_TZ_OFF || utc) { #if defined(_WIN32) long sys_tzoff; #endif #if defined(_WIN32) || defined(HAVE_TM_GMTOFF) || defined (HAVE_TIMEZONE) if (tzoff == PURPLE_NO_TZ_OFF) tzoff = 0; #endif #ifdef _WIN32 if ((sys_tzoff = wpurple_get_tz_offset()) == -1) tzoff = PURPLE_NO_TZ_OFF; else tzoff += sys_tzoff; #else #ifdef HAVE_TM_GMTOFF tzoff += t.tm_gmtoff; #else # ifdef HAVE_TIMEZONE tzset(); /* making sure */ tzoff -= timezone; # endif #endif #endif /* _WIN32 */ } } else { if (rest != NULL && *c != '\0') *rest = c; } } retval = mktime(&t); if (tm != NULL) *tm = t; if (tzoff != PURPLE_NO_TZ_OFF) retval += tzoff; if (tz_off != NULL) *tz_off = tzoff; return retval; } /************************************************************************** * Markup Functions **************************************************************************/ /* * This function is stolen from glib's gmarkup.c and modified to not * replace ' with ' */ static void append_escaped_text(GString *str, const gchar *text, gssize length) { const gchar *p; const gchar *end; gunichar c; p = text; end = text + length; while (p != end) { const gchar *next; next = g_utf8_next_char (p); switch (*p) { case '&': g_string_append (str, "&"); break; case '<': g_string_append (str, "<"); break; case '>': g_string_append (str, ">"); break; case '"': g_string_append (str, """); break; default: c = g_utf8_get_char (p); if ((0x1 <= c && c <= 0x8) || (0xb <= c && c <= 0xc) || (0xe <= c && c <= 0x1f) || (0x7f <= c && c <= 0x84) || (0x86 <= c && c <= 0x9f)) g_string_append_printf (str, "&#x%x;", c); else g_string_append_len (str, p, next - p); break; } p = next; } } /* This function is stolen from glib's gmarkup.c */ gchar *purple_markup_escape_text(const gchar *text, gssize length) { GString *str; g_return_val_if_fail(text != NULL, NULL); if (length < 0) length = strlen(text); /* prealloc at least as long as original text */ str = g_string_sized_new(length); append_escaped_text(str, text, length); return g_string_free(str, FALSE); } const char * purple_markup_unescape_entity(const char *text, int *length) { const char *pln; int len, pound; char temp[2]; if (!text || *text != '&') return NULL; #define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1))) if(IS_ENTITY("&")) pln = "&"; else if(IS_ENTITY("<")) pln = "<"; else if(IS_ENTITY(">")) pln = ">"; else if(IS_ENTITY(" ")) pln = " "; else if(IS_ENTITY("©")) pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */ else if(IS_ENTITY(""")) pln = "\""; else if(IS_ENTITY("®")) pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */ else if(IS_ENTITY("'")) pln = "\'"; else if(*(text+1) == '#' && (sscanf(text, "&#%u%1[;]", £, temp) == 2 || sscanf(text, "&#x%x%1[;]", £, temp) == 2) && pound != 0) { static char buf[7]; int buflen = g_unichar_to_utf8((gunichar)pound, buf); buf[buflen] = '\0'; pln = buf; len = (*(text+2) == 'x' ? 3 : 2); while(isxdigit((gint) text[len])) len++; if(text[len] == ';') len++; } else return NULL; if (length) *length = len; return pln; } char * purple_markup_get_css_property(const gchar *style, const gchar *opt) { const gchar *css_str = style; const gchar *css_value_start; const gchar *css_value_end; gchar *tmp; gchar *ret; g_return_val_if_fail(opt != NULL, NULL); if (!css_str) return NULL; /* find the CSS property */ while (1) { /* skip whitespace characters */ while (*css_str && g_ascii_isspace(*css_str)) css_str++; if (!g_ascii_isalpha(*css_str)) return NULL; if (g_ascii_strncasecmp(css_str, opt, strlen(opt))) { /* go to next css property positioned after the next ';' */ while (*css_str && *css_str != '"' && *css_str != ';') css_str++; if(*css_str != ';') return NULL; css_str++; } else break; } /* find the CSS value position in the string */ css_str += strlen(opt); while (*css_str && g_ascii_isspace(*css_str)) css_str++; if (*css_str != ':') return NULL; css_str++; while (*css_str && g_ascii_isspace(*css_str)) css_str++; if (*css_str == '\0' || *css_str == '"' || *css_str == ';') return NULL; /* mark the CSS value */ css_value_start = css_str; while (*css_str && *css_str != '"' && *css_str != ';') css_str++; css_value_end = css_str - 1; /* Removes trailing whitespace */ while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end)) css_value_end--; tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1); ret = purple_unescape_html(tmp); g_free(tmp); return ret; } gboolean purple_markup_is_rtl(const char *html) { GData *attributes; const gchar *start, *end; gboolean res = FALSE; if (purple_markup_find_tag("span", html, &start, &end, &attributes)) { /* tmp is a member of attributes and is free with g_datalist_clear call */ const char *tmp = g_datalist_get_data(&attributes, "dir"); if (tmp && !g_ascii_strcasecmp(tmp, "RTL")) res = TRUE; if (!res) { tmp = g_datalist_get_data(&attributes, "style"); if (tmp) { char *tmp2 = purple_markup_get_css_property(tmp, "direction"); if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL")) res = TRUE; g_free(tmp2); } } g_datalist_clear(&attributes); } return res; } gboolean purple_markup_find_tag(const char *needle, const char *haystack, const char **start, const char **end, GData **attributes) { GData *attribs; const char *cur = haystack; char *name = NULL; gboolean found = FALSE; gboolean in_tag = FALSE; gboolean in_attr = FALSE; const char *in_quotes = NULL; size_t needlelen; g_return_val_if_fail( needle != NULL, FALSE); g_return_val_if_fail( *needle != '\0', FALSE); g_return_val_if_fail( haystack != NULL, FALSE); g_return_val_if_fail( start != NULL, FALSE); g_return_val_if_fail( end != NULL, FALSE); g_return_val_if_fail(attributes != NULL, FALSE); needlelen = strlen(needle); g_datalist_init(&attribs); while (*cur && !found) { if (in_tag) { if (in_quotes) { const char *close = cur; while (*close && *close != *in_quotes) close++; /* if we got the close quote, store the value and carry on from * * after it. if we ran to the end of the string, point to the NULL * * and we're outta here */ if (*close) { /* only store a value if we have an attribute name */ if (name) { size_t len = close - cur; char *val = g_strndup(cur, len); g_datalist_set_data_full(&attribs, name, val, g_free); g_free(name); name = NULL; } in_quotes = NULL; cur = close + 1; } else { cur = close; } } else if (in_attr) { const char *close = cur; while (*close && *close != '>' && *close != '"' && *close != '\'' && *close != ' ' && *close != '=') close++; /* if we got the equals, store the name of the attribute. if we got * the quote, save the attribute and go straight to quote mode. * otherwise the tag closed or we reached the end of the string, * so we can get outta here */ switch (*close) { case '"': case '\'': in_quotes = close; case '=': { size_t len = close - cur; /* don't store a blank attribute name */ if (len) { g_free(name); name = g_ascii_strdown(cur, len); } in_attr = FALSE; cur = close + 1; break; } case ' ': case '>': in_attr = FALSE; default: cur = close; break; } } else { switch (*cur) { case ' ': /* swallow extra spaces inside tag */ while (*cur && *cur == ' ') cur++; in_attr = TRUE; break; case '>': found = TRUE; *end = cur; break; case '"': case '\'': in_quotes = cur; default: cur++; break; } } } else { /* if we hit a < followed by the name of our tag... */ if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) { *start = cur; cur = cur + needlelen + 1; /* if we're pointing at a space or a >, we found the right tag. if * * we're not, we've found a longer tag, so we need to skip to the * * >, but not being distracted by >s inside quotes. */ if (*cur == ' ' || *cur == '>') { in_tag = TRUE; } else { while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') { if (*cur == '"') { cur++; while (*cur && *cur != '"') cur++; } else if (*cur == '\'') { cur++; while (*cur && *cur != '\'') cur++; } else { cur++; } } } } else { cur++; } } } /* clean up any attribute name from a premature termination */ g_free(name); if (found) { *attributes = attribs; } else { *start = NULL; *end = NULL; *attributes = NULL; } return found; } gboolean purple_markup_extract_info_field(const char *str, int len, PurpleNotifyUserInfo *user_info, const char *start_token, int skip, const char *end_token, char check_value, const char *no_value_token, const char *display_name, gboolean is_link, const char *link_prefix, PurpleInfoFieldFormatCallback format_cb) { const char *p, *q; g_return_val_if_fail(str != NULL, FALSE); g_return_val_if_fail(user_info != NULL, FALSE); g_return_val_if_fail(start_token != NULL, FALSE); g_return_val_if_fail(end_token != NULL, FALSE); g_return_val_if_fail(display_name != NULL, FALSE); p = strstr(str, start_token); if (p == NULL) return FALSE; p += strlen(start_token) + skip; if (p >= str + len) return FALSE; if (check_value != '\0' && *p == check_value) return FALSE; q = strstr(p, end_token); /* Trim leading blanks */ while (*p != '\n' && g_ascii_isspace(*p)) { p += 1; } /* Trim trailing blanks */ while (q > p && g_ascii_isspace(*(q - 1))) { q -= 1; } /* Don't bother with null strings */ if (p == q) return FALSE; if (q != NULL && (!no_value_token || (no_value_token && strncmp(p, no_value_token, strlen(no_value_token))))) { GString *dest = g_string_new(""); if (is_link) { g_string_append(dest, ""); if (link_prefix) g_string_append(dest, link_prefix); g_string_append_len(dest, p, q - p); g_string_append(dest, ""); } else { if (format_cb != NULL) { char *reformatted = format_cb(p, q - p); g_string_append(dest, reformatted); g_free(reformatted); } else g_string_append_len(dest, p, q - p); } purple_notify_user_info_add_pair(user_info, display_name, dest->str); g_string_free(dest, TRUE); return TRUE; } return FALSE; } struct purple_parse_tag { char *src_tag; char *dest_tag; gboolean ignore; }; /* NOTE: Do not put `do {} while(0)` around this macro (as this is the method recommended in the GCC docs). It contains 'continue's that should affect the while-loop in purple_markup_html_to_xhtml and doing the above would break that. Also, remember to put braces in constructs that require them for multiple statements when using this macro. */ #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ const char *o = c + strlen("<" x); \ const char *p = NULL, *q = NULL, *r = NULL; \ /* o = iterating over full tag \ * p = > (end of tag) \ * q = start of quoted bit \ * r = < inside tag \ */ \ GString *innards = g_string_new(""); \ while(o && *o) { \ if(!q && (*o == '\"' || *o == '\'') ) { \ q = o; \ } else if(q) { \ if(*o == *q) { /* end of quoted bit */ \ char *unescaped = g_strndup(q+1, o-q-1); \ char *escaped = g_markup_escape_text(unescaped, -1); \ g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ g_free(unescaped); \ g_free(escaped); \ q = NULL; \ } else if(*c == '\\') { \ o++; \ } \ } else if(*o == '<') { \ r = o; \ } else if(*o == '>') { \ p = o; \ break; \ } else { \ innards = g_string_append_c(innards, *o); \ } \ o++; \ } \ if(p && !r) { /* got an end of tag and no other < earlier */\ if(*(p-1) != '/') { \ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \ pt->src_tag = x; \ pt->dest_tag = y; \ tags = g_list_prepend(tags, pt); \ } \ if(xhtml) { \ xhtml = g_string_append(xhtml, "<" y); \ xhtml = g_string_append(xhtml, innards->str); \ xhtml = g_string_append_c(xhtml, '>'); \ } \ c = p + 1; \ } else { /* got end of tag with earlier < *or* didn't get anything */ \ if(xhtml) \ xhtml = g_string_append(xhtml, "<"); \ if(plain) \ plain = g_string_append_c(plain, '<'); \ c++; \ } \ g_string_free(innards, TRUE); \ continue; \ } \ if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ (*(c+strlen("<" x)) == '>' || \ !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ if(xhtml) \ xhtml = g_string_append(xhtml, "<" y); \ c += strlen("<" x); \ if(*c != '/') { \ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \ pt->src_tag = x; \ pt->dest_tag = y; \ tags = g_list_prepend(tags, pt); \ if(xhtml) \ xhtml = g_string_append_c(xhtml, '>'); \ } else { \ if(xhtml) \ xhtml = g_string_append(xhtml, "/>");\ } \ c = strchr(c, '>') + 1; \ continue; \ } /* Don't forget to check the note above for ALLOW_TAG_ALT. */ #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) void purple_markup_html_to_xhtml(const char *html, char **xhtml_out, char **plain_out) { GString *xhtml = NULL; GString *plain = NULL; GString *url = NULL; GString *cdata = NULL; GList *tags = NULL, *tag; const char *c = html; char quote = '\0'; #define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \ quote = *(ptr++); \ else \ quote = '\0'; #define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>'))) g_return_if_fail(xhtml_out != NULL || plain_out != NULL); if(xhtml_out) xhtml = g_string_new(""); if(plain_out) plain = g_string_new(""); while(c && *c) { if(*c == '<') { if(*(c+1) == '/') { /* closing tag */ tag = tags; while(tag) { struct purple_parse_tag *pt = tag->data; if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { c += strlen(pt->src_tag) + 3; break; } tag = tag->next; } if(tag) { while(tags) { struct purple_parse_tag *pt = tags->data; if(xhtml && !pt->ignore) g_string_append_printf(xhtml, "", pt->dest_tag); if(plain && purple_strequal(pt->src_tag, "a")) { /* if this is a link, we have to add the url to the plaintext, too */ if (cdata && url && (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 || g_utf8_collate(url->str + 7, cdata->str) != 0))) g_string_append_printf(plain, " <%s>", g_strstrip(url->str)); if (cdata) { g_string_free(cdata, TRUE); cdata = NULL; } } if(tags == tag) break; tags = g_list_remove(tags, pt); g_free(pt); } g_free(tag->data); tags = g_list_remove(tags, tag->data); } else { /* a closing tag we weren't expecting... * we'll let it slide, if it's really a tag...if it's * just a ') { c = end+1; } else { if(xhtml) xhtml = g_string_append(xhtml, "<"); if(plain) plain = g_string_append_c(plain, '<'); c++; } } } else { /* opening tag */ ALLOW_TAG("blockquote"); ALLOW_TAG("cite"); ALLOW_TAG("div"); ALLOW_TAG("em"); ALLOW_TAG("h1"); ALLOW_TAG("h2"); ALLOW_TAG("h3"); ALLOW_TAG("h4"); ALLOW_TAG("h5"); ALLOW_TAG("h6"); /* we only allow html to start the message */ if(c == html) { ALLOW_TAG("html"); } ALLOW_TAG_ALT("i", "em"); ALLOW_TAG_ALT("italic", "em"); ALLOW_TAG("li"); ALLOW_TAG("ol"); ALLOW_TAG("p"); ALLOW_TAG("pre"); ALLOW_TAG("q"); ALLOW_TAG("span"); ALLOW_TAG("ul"); /* we skip
because it's not legal in XHTML-IM. However, * we still want to send something sensible, so we put a * linebreak in its place.
also needs special handling * because putting a
to close it would just be dumb. */ if((!g_ascii_strncasecmp(c, "' || !g_ascii_strncasecmp(c+3, "/>", 2) || !g_ascii_strncasecmp(c+3, " />", 3))) { c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, "
"); if(plain && *c != '\n') plain = g_string_append_c(plain, '\n'); continue; } if(!g_ascii_strncasecmp(c, "", 3) || !g_ascii_strncasecmp(c, "", strlen("")) || !g_ascii_strncasecmp(c, "", strlen(""))) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); if (*(c+2) == '>') pt->src_tag = "b"; else if (*(c+2) == 'o') pt->src_tag = "bold"; else pt->src_tag = "strong"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, ""); continue; } if(!g_ascii_strncasecmp(c, "", 3) || !g_ascii_strncasecmp(c, "", strlen(""))) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = *(c+2) == '>' ? "u" : "underline"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if (xhtml) xhtml = g_string_append(xhtml, ""); continue; } if(!g_ascii_strncasecmp(c, "", 3) || !g_ascii_strncasecmp(c, "", strlen(""))) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = *(c+2) == '>' ? "s" : "strike"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, ""); continue; } if(!g_ascii_strncasecmp(c, "", 5)) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = "sub"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, ""); continue; } if(!g_ascii_strncasecmp(c, "", 5)) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = "sup"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, ""); continue; } if (!g_ascii_strncasecmp(c, "' || *(c+4) == ' ')) { const char *p = c + 4; GString *src = NULL, *alt = NULL; while (*p && *p != '>') { if (!g_ascii_strncasecmp(p, "src=", 4)) { const char *q = p + 4; if (src) g_string_free(src, TRUE); src = g_string_new(""); CHECK_QUOTE(q); while (VALID_CHAR(q)) { src = g_string_append_c(src, *q); q++; } p = q; } else if (!g_ascii_strncasecmp(p, "alt=", 4)) { const char *q = p + 4; if (alt) g_string_free(alt, TRUE); alt = g_string_new(""); CHECK_QUOTE(q); while (VALID_CHAR(q)) { alt = g_string_append_c(alt, *q); q++; } p = q; } else { p++; } } if ((c = strchr(p, '>')) != NULL) c++; else c = p; /* src and alt are required! */ if(src && xhtml) g_string_append_printf(xhtml, "%s", g_strstrip(src->str), alt ? alt->str : ""); if(alt) { if(plain) plain = g_string_append(plain, alt->str); if(!src && xhtml) xhtml = g_string_append(xhtml, alt->str); g_string_free(alt, TRUE); } g_string_free(src, TRUE); continue; } if (!g_ascii_strncasecmp(c, "' || *(c+2) == ' ')) { const char *p = c + 2; struct purple_parse_tag *pt; while (*p && *p != '>') { if (!g_ascii_strncasecmp(p, "href=", 5)) { const char *q = p + 5; if (url) g_string_free(url, TRUE); url = g_string_new(""); if (cdata) g_string_free(cdata, TRUE); cdata = g_string_new(""); CHECK_QUOTE(q); while (VALID_CHAR(q)) { int len; if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL)) url = g_string_append(url, "&"); else url = g_string_append_c(url, *q); q++; } p = q; } else { p++; } } if ((c = strchr(p, '>')) != NULL) c++; else c = p; pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = "a"; pt->dest_tag = "a"; tags = g_list_prepend(tags, pt); if(xhtml) g_string_append_printf(xhtml, "", url ? g_strstrip(url->str) : ""); continue; } if(!g_ascii_strncasecmp(c, "' || *(c+5) == ' ')) { const char *p = c + 5; GString *style = g_string_new(""); struct purple_parse_tag *pt; while (*p && *p != '>') { if (!g_ascii_strncasecmp(p, "back=", 5)) { const char *q = p + 5; GString *color = g_string_new(""); CHECK_QUOTE(q); while (VALID_CHAR(q)) { color = g_string_append_c(color, *q); q++; } g_string_append_printf(style, "background: %s; ", color->str); g_string_free(color, TRUE); p = q; } else if (!g_ascii_strncasecmp(p, "color=", 6)) { const char *q = p + 6; GString *color = g_string_new(""); CHECK_QUOTE(q); while (VALID_CHAR(q)) { color = g_string_append_c(color, *q); q++; } g_string_append_printf(style, "color: %s; ", color->str); g_string_free(color, TRUE); p = q; } else if (!g_ascii_strncasecmp(p, "face=", 5)) { const char *q = p + 5; GString *face = g_string_new(""); CHECK_QUOTE(q); while (VALID_CHAR(q)) { face = g_string_append_c(face, *q); q++; } g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str)); g_string_free(face, TRUE); p = q; } else if (!g_ascii_strncasecmp(p, "size=", 5)) { const char *q = p + 5; int sz; const char *size = "medium"; CHECK_QUOTE(q); sz = atoi(q); switch (sz) { case 1: size = "xx-small"; break; case 2: size = "small"; break; case 3: size = "medium"; break; case 4: size = "large"; break; case 5: size = "x-large"; break; case 6: case 7: size = "xx-large"; break; default: break; } g_string_append_printf(style, "font-size: %s; ", size); p = q; } else { p++; } } if ((c = strchr(p, '>')) != NULL) c++; else c = p; pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = "font"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); if(style->len && xhtml) g_string_append_printf(xhtml, "", g_strstrip(style->str)); else pt->ignore = TRUE; g_string_free(style, TRUE); continue; } if (!g_ascii_strncasecmp(c, "", g_strstrip(color->str)); g_string_free(color, TRUE); if ((c = strchr(p, '>')) != NULL) c++; else c = p; pt->src_tag = "body"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); did_something = TRUE; break; } p++; } if (did_something) continue; } /* this has to come after the special case for bgcolor */ ALLOW_TAG("body"); if(!g_ascii_strncasecmp(c, ""); if(p) { if(xhtml) xhtml = g_string_append(xhtml, "