diff options
author | Gary Kramlich <grim@reaperworld.com> | 2020-10-16 02:27:21 -0500 |
---|---|---|
committer | Gary Kramlich <grim@reaperworld.com> | 2020-10-16 02:27:21 -0500 |
commit | 4dd57d497e1b638d808ab6548329859f150f7e6e (patch) | |
tree | 17ecb67ca0a05296edbd1ddb07c68ca2b908c8d1 /libpurple/util.c | |
parent | 22ca01c264a671b0060469a7f6e9251a3a3447d7 (diff) | |
download | pidgin-4dd57d497e1b638d808ab6548329859f150f7e6e.tar.gz |
Pull the purple_markup_* api out of util.[ch] to purplemarkup.[ch]. No code was changed just moved it from one file to the other.
Testing Done:
Compile and unit tests.
Reviewed at https://reviews.imfreedom.org/r/171/
Diffstat (limited to 'libpurple/util.c')
-rw-r--r-- | libpurple/util.c | 1540 |
1 files changed, 0 insertions, 1540 deletions
diff --git a/libpurple/util.c b/libpurple/util.c index ae4702935c..12386ecd54 100644 --- a/libpurple/util.c +++ b/libpurple/util.c @@ -449,1546 +449,6 @@ gint purple_time_parse_month(const char *month_abbr) } /************************************************************************** - * Markup Functions - **************************************************************************/ - -/* - * This function is stolen from glib's gmarkup.c and modified to not - * replace ' with ' - */ -static void append_escaped_text(GString *str, - const gchar *text, gssize length) -{ - const gchar *p; - const gchar *end; - gunichar c; - - p = text; - end = text + length; - - while (p != end) - { - const gchar *next; - next = g_utf8_next_char (p); - - switch (*p) - { - case '&': - g_string_append (str, "&"); - break; - - case '<': - g_string_append (str, "<"); - break; - - case '>': - g_string_append (str, ">"); - break; - - case '"': - g_string_append (str, """); - break; - - default: - c = g_utf8_get_char (p); - if ((0x1 <= c && c <= 0x8) || - (0xb <= c && c <= 0xc) || - (0xe <= c && c <= 0x1f) || - (0x7f <= c && c <= 0x84) || - (0x86 <= c && c <= 0x9f)) - g_string_append_printf (str, "&#x%x;", c); - else - g_string_append_len (str, p, next - p); - break; - } - - p = next; - } -} - -/* This function is stolen from glib's gmarkup.c */ -gchar *purple_markup_escape_text(const gchar *text, gssize length) -{ - GString *str; - - g_return_val_if_fail(text != NULL, NULL); - - if (length < 0) - length = strlen(text); - - /* prealloc at least as long as original text */ - str = g_string_sized_new(length); - append_escaped_text(str, text, length); - - return g_string_free(str, FALSE); -} - -const char * -purple_markup_unescape_entity(const char *text, int *length) -{ - const char *pln; - int len; - - if (!text || *text != '&') - return NULL; - -#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1))) - - if(IS_ENTITY("&")) - pln = "&"; - else if(IS_ENTITY("<")) - pln = "<"; - else if(IS_ENTITY(">")) - pln = ">"; - else if(IS_ENTITY(" ")) - pln = " "; - else if(IS_ENTITY("©")) - pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */ - else if(IS_ENTITY(""")) - pln = "\""; - else if(IS_ENTITY("®")) - pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */ - else if(IS_ENTITY("'")) - pln = "\'"; - else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) { - static char buf[7]; - const char *start = text + 2; - char *end; - guint64 pound; - int base = 10; - int buflen; - - if (*start == 'x') { - base = 16; - start++; - } - - pound = g_ascii_strtoull(start, &end, base); - if (pound == 0 || pound > INT_MAX || *end != ';') { - return NULL; - } - - len = (end - text) + 1; - - buflen = g_unichar_to_utf8((gunichar)pound, buf); - buf[buflen] = '\0'; - pln = buf; - } - else - return NULL; - - if (length) - *length = len; - return pln; -} - -char * -purple_markup_get_css_property(const gchar *style, - const gchar *opt) -{ - const gchar *css_str = style; - const gchar *css_value_start; - const gchar *css_value_end; - gchar *tmp; - gchar *ret; - - g_return_val_if_fail(opt != NULL, NULL); - - if (!css_str) - return NULL; - - /* find the CSS property */ - while (1) - { - /* skip whitespace characters */ - while (*css_str && g_ascii_isspace(*css_str)) - css_str++; - if (!g_ascii_isalpha(*css_str)) - return NULL; - if (g_ascii_strncasecmp(css_str, opt, strlen(opt))) - { - /* go to next css property positioned after the next ';' */ - while (*css_str && *css_str != '"' && *css_str != ';') - css_str++; - if(*css_str != ';') - return NULL; - css_str++; - } - else - break; - } - - /* find the CSS value position in the string */ - css_str += strlen(opt); - while (*css_str && g_ascii_isspace(*css_str)) - css_str++; - if (*css_str != ':') - return NULL; - css_str++; - while (*css_str && g_ascii_isspace(*css_str)) - css_str++; - if (*css_str == '\0' || *css_str == '"' || *css_str == ';') - return NULL; - - /* mark the CSS value */ - css_value_start = css_str; - while (*css_str && *css_str != '"' && *css_str != ';') - css_str++; - css_value_end = css_str - 1; - - /* Removes trailing whitespace */ - while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end)) - css_value_end--; - - tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1); - ret = purple_unescape_html(tmp); - g_free(tmp); - - return ret; -} - -gboolean purple_markup_is_rtl(const char *html) -{ - GData *attributes; - const gchar *start, *end; - gboolean res = FALSE; - - if (purple_markup_find_tag("span", html, &start, &end, &attributes)) - { - /* tmp is a member of attributes and is free with g_datalist_clear call */ - const char *tmp = g_datalist_get_data(&attributes, "dir"); - if (tmp && !g_ascii_strcasecmp(tmp, "RTL")) - res = TRUE; - if (!res) - { - tmp = g_datalist_get_data(&attributes, "style"); - if (tmp) - { - char *tmp2 = purple_markup_get_css_property(tmp, "direction"); - if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL")) - res = TRUE; - g_free(tmp2); - } - - } - g_datalist_clear(&attributes); - } - return res; -} - -gboolean -purple_markup_find_tag(const char *needle, const char *haystack, - const char **start, const char **end, GData **attributes) -{ - GData *attribs; - const char *cur = haystack; - char *name = NULL; - gboolean found = FALSE; - gboolean in_tag = FALSE; - gboolean in_attr = FALSE; - const char *in_quotes = NULL; - size_t needlelen; - - g_return_val_if_fail( needle != NULL, FALSE); - g_return_val_if_fail( *needle != '\0', FALSE); - g_return_val_if_fail( haystack != NULL, FALSE); - g_return_val_if_fail( start != NULL, FALSE); - g_return_val_if_fail( end != NULL, FALSE); - g_return_val_if_fail(attributes != NULL, FALSE); - - needlelen = strlen(needle); - g_datalist_init(&attribs); - - while (*cur && !found) { - if (in_tag) { - if (in_quotes) { - const char *close = cur; - - while (*close && *close != *in_quotes) - close++; - - /* if we got the close quote, store the value and carry on from * - * after it. if we ran to the end of the string, point to the NULL * - * and we're outta here */ - if (*close) { - /* only store a value if we have an attribute name */ - if (name) { - size_t len = close - cur; - char *val = g_strndup(cur, len); - - g_datalist_set_data_full(&attribs, name, val, g_free); - g_free(name); - name = NULL; - } - - in_quotes = NULL; - cur = close + 1; - } else { - cur = close; - } - } else if (in_attr) { - const char *close = cur; - - while (*close && *close != '>' && *close != '"' && - *close != '\'' && *close != ' ' && *close != '=') - close++; - - /* if we got the equals, store the name of the attribute. if we got - * the quote, save the attribute and go straight to quote mode. - * otherwise the tag closed or we reached the end of the string, - * so we can get outta here */ - switch (*close) { - case '"': - case '\'': - in_quotes = close; - /* fall through */ - case '=': - { - size_t len = close - cur; - - /* don't store a blank attribute name */ - if (len) { - g_free(name); - name = g_ascii_strdown(cur, len); - } - - in_attr = FALSE; - cur = close + 1; - } - break; - case ' ': - case '>': - in_attr = FALSE; - /* fall through */ - default: - cur = close; - break; - } - } else { - switch (*cur) { - case ' ': - /* swallow extra spaces inside tag */ - while (*cur && *cur == ' ') cur++; - in_attr = TRUE; - break; - case '>': - found = TRUE; - *end = cur; - break; - case '"': - case '\'': - in_quotes = cur; - /* fall through */ - default: - cur++; - break; - } - } - } else { - /* if we hit a < followed by the name of our tag... */ - if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) { - *start = cur; - cur = cur + needlelen + 1; - - /* if we're pointing at a space or a >, we found the right tag. if * - * we're not, we've found a longer tag, so we need to skip to the * - * >, but not being distracted by >s inside quotes. */ - if (*cur == ' ' || *cur == '>') { - in_tag = TRUE; - } else { - while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') { - if (*cur == '"') { - cur++; - while (*cur && *cur != '"') - cur++; - } else if (*cur == '\'') { - cur++; - while (*cur && *cur != '\'') - cur++; - } else { - cur++; - } - } - } - } else { - cur++; - } - } - } - - /* clean up any attribute name from a premature termination */ - g_free(name); - - if (found) { - *attributes = attribs; - } else { - *start = NULL; - *end = NULL; - *attributes = NULL; - } - - return found; -} - -struct purple_parse_tag { - char *src_tag; - char *dest_tag; - gboolean ignore; -}; - -/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method - recommended in the GCC docs). It contains 'continue's that should - affect the while-loop in purple_markup_html_to_xhtml and doing the - above would break that. - Also, remember to put braces in constructs that require them for - multiple statements when using this macro. */ -#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ - const char *o = c + strlen("<" x); \ - const char *p = NULL, *q = NULL, *r = NULL; \ - /* o = iterating over full tag \ - * p = > (end of tag) \ - * q = start of quoted bit \ - * r = < inside tag \ - */ \ - GString *innards = g_string_new(""); \ - while(o && *o) { \ - if(!q && (*o == '\"' || *o == '\'') ) { \ - q = o; \ - } else if(q) { \ - if(*o == *q) { /* end of quoted bit */ \ - char *unescaped = g_strndup(q+1, o-q-1); \ - char *escaped = g_markup_escape_text(unescaped, -1); \ - g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ - g_free(unescaped); \ - g_free(escaped); \ - q = NULL; \ - } else if(*c == '\\') { \ - o++; \ - } \ - } else if(*o == '<') { \ - r = o; \ - } else if(*o == '>') { \ - p = o; \ - break; \ - } else { \ - innards = g_string_append_c(innards, *o); \ - } \ - o++; \ - } \ - if(p && !r) { /* got an end of tag and no other < earlier */\ - if(*(p-1) != '/') { \ - struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \ - pt->src_tag = x; \ - pt->dest_tag = y; \ - tags = g_list_prepend(tags, pt); \ - } \ - if(xhtml) { \ - xhtml = g_string_append(xhtml, "<" y); \ - xhtml = g_string_append(xhtml, innards->str); \ - xhtml = g_string_append_c(xhtml, '>'); \ - } \ - c = p + 1; \ - } else { /* got end of tag with earlier < *or* didn't get anything */ \ - if(xhtml) \ - xhtml = g_string_append(xhtml, "<"); \ - if(plain) \ - plain = g_string_append_c(plain, '<'); \ - c++; \ - } \ - g_string_free(innards, TRUE); \ - continue; \ - } \ - if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ - (*(c+strlen("<" x)) == '>' || \ - !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ - if(xhtml) \ - xhtml = g_string_append(xhtml, "<" y); \ - c += strlen("<" x); \ - if(*c != '/') { \ - struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \ - pt->src_tag = x; \ - pt->dest_tag = y; \ - tags = g_list_prepend(tags, pt); \ - if(xhtml) \ - xhtml = g_string_append_c(xhtml, '>'); \ - } else { \ - if(xhtml) \ - xhtml = g_string_append(xhtml, "/>");\ - } \ - c = strchr(c, '>') + 1; \ - continue; \ - } -/* Don't forget to check the note above for ALLOW_TAG_ALT. */ -#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) -void -purple_markup_html_to_xhtml(const char *html, char **xhtml_out, - char **plain_out) -{ - GString *xhtml = NULL; - GString *plain = NULL; - GString *url = NULL; - GString *cdata = NULL; - GList *tags = NULL, *tag; - const char *c = html; - char quote = '\0'; - -#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \ - quote = *(ptr++); \ - else \ - quote = '\0'; - -#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>'))) - - g_return_if_fail(xhtml_out != NULL || plain_out != NULL); - - if(xhtml_out) - xhtml = g_string_new(""); - if(plain_out) - plain = g_string_new(""); - - while(c && *c) { - if(*c == '<') { - if(*(c+1) == '/') { /* closing tag */ - tag = tags; - while(tag) { - struct purple_parse_tag *pt = tag->data; - if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { - c += strlen(pt->src_tag) + 3; - break; - } - tag = tag->next; - } - if(tag) { - while(tags) { - struct purple_parse_tag *pt = tags->data; - if(xhtml && !pt->ignore) - g_string_append_printf(xhtml, "</%s>", pt->dest_tag); - if(plain && purple_strequal(pt->src_tag, "a")) { - /* if this is a link, we have to add the url to the plaintext, too */ - if (cdata && url && - (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 || - g_utf8_collate(url->str + 7, cdata->str) != 0))) - g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str))); - if (cdata) { - g_string_free(cdata, TRUE); - cdata = NULL; - } - - } - if(tags == tag) - break; - tags = g_list_delete_link(tags, tags); - g_free(pt); - } - g_free(tag->data); - tags = g_list_delete_link(tags, tag); - } else { - /* a closing tag we weren't expecting... - * we'll let it slide, if it's really a tag...if it's - * just a </ we'll escape it properly */ - const char *end = c+2; - while(*end && g_ascii_isalpha(*end)) - end++; - if(*end == '>') { - c = end+1; - } else { - if(xhtml) - xhtml = g_string_append(xhtml, "<"); - if(plain) - plain = g_string_append_c(plain, '<'); - c++; - } - } - } else { /* opening tag */ - ALLOW_TAG("blockquote"); - ALLOW_TAG("cite"); - ALLOW_TAG("div"); - ALLOW_TAG("em"); - ALLOW_TAG("h1"); - ALLOW_TAG("h2"); - ALLOW_TAG("h3"); - ALLOW_TAG("h4"); - ALLOW_TAG("h5"); - ALLOW_TAG("h6"); - /* we only allow html to start the message */ - if(c == html) { - ALLOW_TAG("html"); - } - ALLOW_TAG_ALT("i", "em"); - ALLOW_TAG_ALT("italic", "em"); - ALLOW_TAG("li"); - ALLOW_TAG("ol"); - ALLOW_TAG("p"); - ALLOW_TAG("pre"); - ALLOW_TAG("q"); - ALLOW_TAG("span"); - ALLOW_TAG("ul"); - - - /* we skip <HR> because it's not legal in XHTML-IM. However, - * we still want to send something sensible, so we put a - * linebreak in its place. <BR> also needs special handling - * because putting a </BR> to close it would just be dumb. */ - if((!g_ascii_strncasecmp(c, "<br", 3) - || !g_ascii_strncasecmp(c, "<hr", 3)) - && (*(c+3) == '>' || - !g_ascii_strncasecmp(c+3, "/>", 2) || - !g_ascii_strncasecmp(c+3, " />", 3))) { - c = strchr(c, '>') + 1; - if(xhtml) - xhtml = g_string_append(xhtml, "<br/>"); - if(plain && *c != '\n') - plain = g_string_append_c(plain, '\n'); - continue; - } - if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) { - struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); - if (*(c+2) == '>') - pt->src_tag = "b"; - else if (*(c+2) == 'o') - pt->src_tag = "bold"; - else - pt->src_tag = "strong"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - if(xhtml) - xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>"); - continue; - } - if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { - struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); - pt->src_tag = *(c+2) == '>' ? "u" : "underline"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - if (xhtml) - xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); - continue; - } - if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { - struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); - pt->src_tag = *(c+2) == '>' ? "s" : "strike"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - if(xhtml) - xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); - continue; - } - if(!g_ascii_strncasecmp(c, "<sub>", 5)) { - struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); - pt->src_tag = "sub"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - if(xhtml) - xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); - continue; - } - if(!g_ascii_strncasecmp(c, "<sup>", 5)) { - struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); - pt->src_tag = "sup"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - if(xhtml) - xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); - continue; - } - if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) { - const char *p = c + 4; - GString *src = NULL, *alt = NULL; -#define ESCAPE(from, to) \ - CHECK_QUOTE(from); \ - while (VALID_CHAR(from)) { \ - int len; \ - if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \ - to = g_string_append(to, "&"); \ - else if (*from == '\'') \ - to = g_string_append(to, "'"); \ - else \ - to = g_string_append_c(to, *from); \ - from++; \ - } - - while (*p && *p != '>') { - if (!g_ascii_strncasecmp(p, "src=", 4)) { - const char *q = p + 4; - if (src) - g_string_free(src, TRUE); - src = g_string_new(""); - ESCAPE(q, src); - p = q; - } else if (!g_ascii_strncasecmp(p, "alt=", 4)) { - const char *q = p + 4; - if (alt) - g_string_free(alt, TRUE); - alt = g_string_new(""); - ESCAPE(q, alt); - p = q; - } else { - p++; - } - } -#undef ESCAPE - if ((c = strchr(p, '>')) != NULL) - c++; - else - c = p; - /* src and alt are required! */ - if(src && xhtml) - g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : ""); - if(alt) { - if(plain) - plain = g_string_append(plain, purple_unescape_html(alt->str)); - if(!src && xhtml) - xhtml = g_string_append(xhtml, alt->str); - g_string_free(alt, TRUE); - } - g_string_free(src, TRUE); - continue; - } - if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) { - const char *p = c + 2; - struct purple_parse_tag *pt; - while (*p && *p != '>') { - if (!g_ascii_strncasecmp(p, "href=", 5)) { - const char *q = p + 5; - if (url) - g_string_free(url, TRUE); - url = g_string_new(""); - if (cdata) - g_string_free(cdata, TRUE); - cdata = g_string_new(""); - CHECK_QUOTE(q); - while (VALID_CHAR(q)) { - int len; - if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL)) - url = g_string_append(url, "&"); - else if (*q == '"') - url = g_string_append(url, """); - else - url = g_string_append_c(url, *q); - q++; - } - p = q; - } else { - p++; - } - } - if ((c = strchr(p, '>')) != NULL) - c++; - else - c = p; - pt = g_new0(struct purple_parse_tag, 1); - pt->src_tag = "a"; - pt->dest_tag = "a"; - tags = g_list_prepend(tags, pt); - if(xhtml) - g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : ""); - continue; - } -#define ESCAPE(from, to) \ - CHECK_QUOTE(from); \ - while (VALID_CHAR(from)) { \ - int len; \ - if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \ - to = g_string_append(to, "&"); \ - else if (*from == '\'') \ - to = g_string_append_c(to, '\"'); \ - else \ - to = g_string_append_c(to, *from); \ - from++; \ - } - if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { - const char *p = c + 5; - GString *style = g_string_new(""); - struct purple_parse_tag *pt; - while (*p && *p != '>') { - if (!g_ascii_strncasecmp(p, "back=", 5)) { - const char *q = p + 5; - GString *color = g_string_new(""); - ESCAPE(q, color); - g_string_append_printf(style, "background: %s; ", color->str); - g_string_free(color, TRUE); - p = q; - } else if (!g_ascii_strncasecmp(p, "color=", 6)) { - const char *q = p + 6; - GString *color = g_string_new(""); - ESCAPE(q, color); - g_string_append_printf(style, "color: %s; ", color->str); - g_string_free(color, TRUE); - p = q; - } else if (!g_ascii_strncasecmp(p, "face=", 5)) { - const char *q = p + 5; - GString *face = g_string_new(""); - ESCAPE(q, face); - g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str)); - g_string_free(face, TRUE); - p = q; - } else if (!g_ascii_strncasecmp(p, "size=", 5)) { - const char *q = p + 5; - int sz; - const char *size = "medium"; - CHECK_QUOTE(q); - sz = atoi(q); - switch (sz) - { - case 1: - size = "xx-small"; - break; - case 2: - size = "small"; - break; - case 3: - size = "medium"; - break; - case 4: - size = "large"; - break; - case 5: - size = "x-large"; - break; - case 6: - case 7: - size = "xx-large"; - break; - default: - break; - } - g_string_append_printf(style, "font-size: %s; ", size); - p = q; - } else { - p++; - } - } - if ((c = strchr(p, '>')) != NULL) - c++; - else - c = p; - pt = g_new0(struct purple_parse_tag, 1); - pt->src_tag = "font"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - if(style->len && xhtml) - g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str)); - else - pt->ignore = TRUE; - g_string_free(style, TRUE); - continue; - } -#undef ESCAPE - if (!g_ascii_strncasecmp(c, "<body ", 6)) { - const char *p = c + 6; - gboolean did_something = FALSE; - while (*p && *p != '>') { - if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) { - const char *q = p + 8; - struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); - GString *color = g_string_new(""); - CHECK_QUOTE(q); - while (VALID_CHAR(q)) { - color = g_string_append_c(color, *q); - q++; - } - if (xhtml) - g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str)); - g_string_free(color, TRUE); - if ((c = strchr(p, '>')) != NULL) - c++; - else - c = p; - pt->src_tag = "body"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - did_something = TRUE; - break; - } - p++; - } - if (did_something) continue; - } - /* this has to come after the special case for bgcolor */ - ALLOW_TAG("body"); - if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { - char *p = strstr(c + strlen("<!--"), "-->"); - if(p) { - if(xhtml) - xhtml = g_string_append(xhtml, "<!--"); - c += strlen("<!--"); - continue; - } - } - - if(xhtml) - xhtml = g_string_append(xhtml, "<"); - if(plain) - plain = g_string_append_c(plain, '<'); - c++; - } - } else if(*c == '&') { - char buf[7]; - const char *pln; - int len; - - if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) { - len = 1; - g_snprintf(buf, sizeof(buf), "%c", *c); - pln = buf; - } - if(xhtml) - xhtml = g_string_append_len(xhtml, c, len); - if(plain) - plain = g_string_append(plain, pln); - if(cdata) - cdata = g_string_append_len(cdata, c, len); - c += len; - } else { - if(xhtml) - xhtml = g_string_append_c(xhtml, *c); - if(plain) - plain = g_string_append_c(plain, *c); - if(cdata) - cdata = g_string_append_c(cdata, *c); - c++; - } - } - if(xhtml) { - for (tag = tags; tag ; tag = tag->next) { - struct purple_parse_tag *pt = tag->data; - if(!pt->ignore) - g_string_append_printf(xhtml, "</%s>", pt->dest_tag); - } - } - g_list_free(tags); - if(xhtml_out) - *xhtml_out = g_string_free(xhtml, FALSE); - if(plain_out) - *plain_out = g_string_free(plain, FALSE); - if(url) - g_string_free(url, TRUE); - if (cdata) - g_string_free(cdata, TRUE); -#undef CHECK_QUOTE -#undef VALID_CHAR -} - -/* The following are probably reasonable changes: - * - \n should be converted to a normal space - * - in addition to <br>, <p> and <div> etc. should also be converted into \n - * - We want to turn </td>#whitespace<td> sequences into a single tab - * - We want to turn </tr>#whitespace<tr> sequences into a single \n - * - <script>...</script> and <style>...</style> should be completely removed - */ - -char * -purple_markup_strip_html(const char *str) -{ - int i, j, k, entlen; - gboolean visible = TRUE; - gboolean closing_td_p = FALSE; - gchar *str2; - const gchar *cdata_close_tag = NULL, *ent; - gchar *href = NULL; - int href_st = 0; - - if(!str) - return NULL; - - str2 = g_strdup(str); - - for (i = 0, j = 0; str2[i]; i++) - { - if (str2[i] == '<') - { - if (cdata_close_tag) - { - /* Note: Don't even assume any other tag is a tag in CDATA */ - if (g_ascii_strncasecmp(str2 + i, cdata_close_tag, - strlen(cdata_close_tag)) == 0) - { - i += strlen(cdata_close_tag) - 1; - cdata_close_tag = NULL; - } - continue; - } - else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p) - { - str2[j++] = '\t'; - visible = TRUE; - } - else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0) - { - closing_td_p = TRUE; - visible = FALSE; - } - else - { - closing_td_p = FALSE; - visible = TRUE; - } - - k = i + 1; - - if(g_ascii_isspace(str2[k])) - visible = TRUE; - else if (str2[k]) - { - /* Scan until we end the tag either implicitly (closed start - * tag) or explicitly, using a sloppy method (i.e., < or > - * inside quoted attributes will screw us up) - */ - while (str2[k] && str2[k] != '<' && str2[k] != '>') - { - k++; - } - - /* If we've got an <a> tag with an href, save the address - * to print later. */ - if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 && - g_ascii_isspace(str2[i+2])) - { - int st; /* start of href, inclusive [ */ - int end; /* end of href, exclusive ) */ - char delim = ' '; - /* Find start of href */ - for (st = i + 3; st < k; st++) - { - if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0) - { - st += 5; - if (str2[st] == '"' || str2[st] == '\'') - { - delim = str2[st]; - st++; - } - break; - } - } - /* find end of address */ - for (end = st; end < k && str2[end] != delim; end++) - { - /* All the work is done in the loop construct above. */ - } - - /* If there's an address, save it. If there was - * already one saved, kill it. */ - if (st < k) - { - char *tmp; - g_free(href); - tmp = g_strndup(str2 + st, end - st); - href = purple_unescape_html(tmp); - g_free(tmp); - href_st = j; - } - } - - /* Replace </a> with an ascii representation of the - * address the link was pointing to. */ - else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0) - { - size_t hrlen = strlen(href); - - /* Only insert the href if it's different from the CDATA. */ - if ((hrlen != (gsize)(j - href_st) || - strncmp(str2 + href_st, href, hrlen)) && - (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */ - strncmp(str2 + href_st, href + 7, hrlen - 7))) - { - str2[j++] = ' '; - str2[j++] = '('; - memmove(str2 + j, href, hrlen); - j += hrlen; - str2[j++] = ')'; - g_free(href); - href = NULL; - } - } - - /* Check for tags which should be mapped to newline (but ignore some of - * the tags at the beginning of the text) */ - else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0 - || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0 - || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0 - || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0 - || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0)) - || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0 - || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0) - { - str2[j++] = '\n'; - } - /* Check for tags which begin CDATA and need to be closed */ - else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0) - { - cdata_close_tag = "</script>"; - } - else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0) - { - cdata_close_tag = "</style>"; - } - /* Update the index and continue checking after the tag */ - i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k; - continue; - } - } - else if (cdata_close_tag) - { - continue; - } - else if (!g_ascii_isspace(str2[i])) - { - visible = TRUE; - } - - if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL) - { - while (*ent) - str2[j++] = *ent++; - i += entlen - 1; - continue; - } - - if (visible) - str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i]; - } - - g_free(href); - - str2[j] = '\0'; - - return str2; -} - -static gboolean -badchar(char c) -{ - switch (c) { - case ' ': - case ',': - case '\0': - case '\n': - case '\r': - case '<': - case '>': - case '"': - return TRUE; - default: - return FALSE; - } -} - -static gboolean -badentity(const char *c) -{ - if (!g_ascii_strncasecmp(c, "<", 4) || - !g_ascii_strncasecmp(c, ">", 4) || - !g_ascii_strncasecmp(c, """, 6)) { - return TRUE; - } - return FALSE; -} - -static const char * -process_link(GString *ret, - const char *start, const char *c, - int matchlen, - const char *urlprefix, - int inside_paren) -{ - char *url_buf, *tmpurlbuf; - const char *t; - - for (t = c;; t++) { - if (!badchar(*t) && !badentity(t)) - continue; - - if (t - c == matchlen) - break; - - if (*t == ',' && *(t + 1) != ' ') { - continue; - } - - if (t > start && *(t - 1) == '.') - t--; - if (t > start && *(t - 1) == ')' && inside_paren > 0) - t--; - - url_buf = g_strndup(c, t - c); - tmpurlbuf = purple_unescape_html(url_buf); - g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>", - urlprefix, - tmpurlbuf, url_buf); - g_free(tmpurlbuf); - g_free(url_buf); - return t; - } - - return c; -} - -char * -purple_markup_linkify(const char *text) -{ - const char *c, *t, *q = NULL; - char *tmpurlbuf, *url_buf; - gunichar g; - gboolean inside_html = FALSE; - int inside_paren = 0; - GString *ret; - - if (text == NULL) - return NULL; - - ret = g_string_new(""); - - c = text; - while (*c) { - - if(*c == '(' && !inside_html) { - inside_paren++; - ret = g_string_append_c(ret, *c); - c++; - } - - if(inside_html) { - if(*c == '>') { - inside_html = FALSE; - } else if(!q && (*c == '\"' || *c == '\'')) { - q = c; - } else if(q) { - if(*c == *q) - q = NULL; - } - } else if(*c == '<') { - inside_html = TRUE; - if (!g_ascii_strncasecmp(c, "<A", 2)) { - while (1) { - if (!g_ascii_strncasecmp(c, "/A>", 3)) { - inside_html = FALSE; - break; - } - ret = g_string_append_c(ret, *c); - c++; - if (!(*c)) - break; - } - } - } else if (!g_ascii_strncasecmp(c, "http://", 7)) { - c = process_link(ret, text, c, 7, "", inside_paren); - } else if (!g_ascii_strncasecmp(c, "https://", 8)) { - c = process_link(ret, text, c, 8, "", inside_paren); - } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) { - c = process_link(ret, text, c, 6, "", inside_paren); - } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) { - c = process_link(ret, text, c, 7, "", inside_paren); - } else if (!g_ascii_strncasecmp(c, "file://", 7)) { - c = process_link(ret, text, c, 7, "", inside_paren); - } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) { - c = process_link(ret, text, c, 4, "http://", inside_paren); - } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) { - c = process_link(ret, text, c, 4, "ftp://", inside_paren); - } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) { - c = process_link(ret, text, c, 5, "", inside_paren); - } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) { - t = c; - while (1) { - if (badchar(*t) || badentity(t)) { - char *d; - if (t - c == 7) { - break; - } - if (t > text && *(t - 1) == '.') - t--; - if ((d = strstr(c + 7, "?")) != NULL && d < t) - url_buf = g_strndup(c + 7, d - c - 7); - else - url_buf = g_strndup(c + 7, t - c - 7); - if (!purple_email_is_valid(url_buf)) { - g_free(url_buf); - break; - } - g_free(url_buf); - url_buf = g_strndup(c, t - c); - tmpurlbuf = purple_unescape_html(url_buf); - g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", - tmpurlbuf, url_buf); - g_free(url_buf); - g_free(tmpurlbuf); - c = t; - break; - } - t++; - } - } else if (c != text && (*c == '@')) { - int flag; - GString *gurl_buf = NULL; - const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0"; - - if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1))) - flag = 0; - else { - flag = 1; - gurl_buf = g_string_new(""); - } - - t = c; - while (flag) { - /* iterate backwards grabbing the local part of an email address */ - g = g_utf8_get_char(t); - if (badchar(*t) || (g >= 127) || (*t == '(') || - ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "<", 4) || - !g_ascii_strncasecmp(t - 3, ">", 4))) || - (t > (text+4) && (!g_ascii_strncasecmp(t - 5, """, 6)))))) { - /* local part will already be part of ret, strip it out */ - ret = g_string_truncate(ret, ret->len - (c - t)); - ret = g_string_append_unichar(ret, g); - break; - } else { - g_string_prepend_unichar(gurl_buf, g); - t = g_utf8_find_prev_char(text, t); - if (t < text) { - ret = g_string_assign(ret, ""); - break; - } - } - } - - t = g_utf8_find_next_char(c, NULL); - - while (flag) { - /* iterate forwards grabbing the domain part of an email address */ - g = g_utf8_get_char(t); - if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) { - char *d; - - url_buf = g_string_free(gurl_buf, FALSE); - gurl_buf = NULL; - - /* strip off trailing periods */ - if (*url_buf) { - for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--) - *d = '\0'; - } - - tmpurlbuf = purple_unescape_html(url_buf); - if (purple_email_is_valid(tmpurlbuf)) { - g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>", - tmpurlbuf, url_buf); - } else { - g_string_append(ret, url_buf); - } - g_free(url_buf); - g_free(tmpurlbuf); - c = t; - - break; - } else { - g_string_append_unichar(gurl_buf, g); - t = g_utf8_find_next_char(t, NULL); - } - } - - if (gurl_buf) { - g_string_free(gurl_buf, TRUE); - } - } - - if(*c == ')' && !inside_html) { - inside_paren--; - ret = g_string_append_c(ret, *c); - c++; - } - - if (*c == 0) - break; - - ret = g_string_append_c(ret, *c); - c++; - - } - return g_string_free(ret, FALSE); -} - -char *purple_unescape_text(const char *in) -{ - GString *ret; - const char *c = in; - - if (in == NULL) - return NULL; - - ret = g_string_new(""); - while (*c) { - int len; - const char *ent; - - if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) { - g_string_append(ret, ent); - c += len; - } else { - g_string_append_c(ret, *c); - c++; - } - } - - return g_string_free(ret, FALSE); -} - -char *purple_unescape_html(const char *html) -{ - GString *ret; - const char *c = html; - - if (html == NULL) - return NULL; - - ret = g_string_new(""); - while (*c) { - int len; - const char *ent; - - if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) { - g_string_append(ret, ent); - c += len; - } else if (!strncmp(c, "<br>", 4)) { - g_string_append_c(ret, '\n'); - c += 4; - } else { - g_string_append_c(ret, *c); - c++; - } - } - - return g_string_free(ret, FALSE); -} - -char * -purple_markup_slice(const char *str, guint x, guint y) -{ - GString *ret; - GQueue *q; - guint z = 0; - gboolean appended = FALSE; - gunichar c; - char *tag; - - g_return_val_if_fail(str != NULL, NULL); - g_return_val_if_fail(x <= y, NULL); - - if (x == y) - return g_strdup(""); - - ret = g_string_new(""); - q = g_queue_new(); - - while (*str && (z < y)) { - c = g_utf8_get_char(str); - - if (c == '<') { - char *end = strchr(str, '>'); - - if (!end) { - g_string_free(ret, TRUE); - while ((tag = g_queue_pop_head(q))) - g_free(tag); - g_queue_free(q); - return NULL; - } - - if (!g_ascii_strncasecmp(str, "<img ", 5)) { - z += strlen("[Image]"); - } else if (!g_ascii_strncasecmp(str, "<br", 3)) { - z += 1; - } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) { - z += strlen("\n---\n"); - } else if (!g_ascii_strncasecmp(str, "</", 2)) { - /* pop stack */ - char *tmp; - - tmp = g_queue_pop_head(q); - g_free(tmp); - /* z += 0; */ - } else { - /* push it unto the stack */ - char *tmp; - - tmp = g_strndup(str, end - str + 1); - g_queue_push_head(q, tmp); - /* z += 0; */ - } - - if (z >= x) { - g_string_append_len(ret, str, end - str + 1); - } - - str = end; - } else if (c == '&') { - char *end = strchr(str, ';'); - if (!end) { - g_string_free(ret, TRUE); - while ((tag = g_queue_pop_head(q))) - g_free(tag); - g_queue_free(q); - - return NULL; - } - - if (z >= x) - g_string_append_len(ret, str, end - str + 1); - - z++; - str = end; - } else { - if (z == x && z > 0 && !appended) { - GList *l = q->tail; - - while (l) { - tag = l->data; - g_string_append(ret, tag); - l = l->prev; - } - appended = TRUE; - } - - if (z >= x) - g_string_append_unichar(ret, c); - z++; - } - - str = g_utf8_next_char(str); - } - - while ((tag = g_queue_pop_head(q))) { - char *name; - - name = purple_markup_get_tag_name(tag); - g_string_append_printf(ret, "</%s>", name); - g_free(name); - g_free(tag); - } - - g_queue_free(q); - return g_string_free(ret, FALSE); -} - -char * -purple_markup_get_tag_name(const char *tag) -{ - int i; - g_return_val_if_fail(tag != NULL, NULL); - g_return_val_if_fail(*tag == '<', NULL); - - for (i = 1; tag[i]; i++) - if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/') - break; - - return g_strndup(tag+1, i-1); -} - -/************************************************************************** * Path/Filename Functions **************************************************************************/ const char * |