summaryrefslogtreecommitdiff
path: root/libpurple/util.c
diff options
context:
space:
mode:
authorGary Kramlich <grim@reaperworld.com>2020-10-16 02:27:21 -0500
committerGary Kramlich <grim@reaperworld.com>2020-10-16 02:27:21 -0500
commit4dd57d497e1b638d808ab6548329859f150f7e6e (patch)
tree17ecb67ca0a05296edbd1ddb07c68ca2b908c8d1 /libpurple/util.c
parent22ca01c264a671b0060469a7f6e9251a3a3447d7 (diff)
downloadpidgin-4dd57d497e1b638d808ab6548329859f150f7e6e.tar.gz
Pull the purple_markup_* api out of util.[ch] to purplemarkup.[ch]. No code was changed just moved it from one file to the other.
Testing Done: Compile and unit tests. Reviewed at https://reviews.imfreedom.org/r/171/
Diffstat (limited to 'libpurple/util.c')
-rw-r--r--libpurple/util.c1540
1 files changed, 0 insertions, 1540 deletions
diff --git a/libpurple/util.c b/libpurple/util.c
index ae4702935c..12386ecd54 100644
--- a/libpurple/util.c
+++ b/libpurple/util.c
@@ -449,1546 +449,6 @@ gint purple_time_parse_month(const char *month_abbr)
}
/**************************************************************************
- * Markup Functions
- **************************************************************************/
-
-/*
- * This function is stolen from glib's gmarkup.c and modified to not
- * replace ' with &apos;
- */
-static void append_escaped_text(GString *str,
- const gchar *text, gssize length)
-{
- const gchar *p;
- const gchar *end;
- gunichar c;
-
- p = text;
- end = text + length;
-
- while (p != end)
- {
- const gchar *next;
- next = g_utf8_next_char (p);
-
- switch (*p)
- {
- case '&':
- g_string_append (str, "&amp;");
- break;
-
- case '<':
- g_string_append (str, "&lt;");
- break;
-
- case '>':
- g_string_append (str, "&gt;");
- break;
-
- case '"':
- g_string_append (str, "&quot;");
- break;
-
- default:
- c = g_utf8_get_char (p);
- if ((0x1 <= c && c <= 0x8) ||
- (0xb <= c && c <= 0xc) ||
- (0xe <= c && c <= 0x1f) ||
- (0x7f <= c && c <= 0x84) ||
- (0x86 <= c && c <= 0x9f))
- g_string_append_printf (str, "&#x%x;", c);
- else
- g_string_append_len (str, p, next - p);
- break;
- }
-
- p = next;
- }
-}
-
-/* This function is stolen from glib's gmarkup.c */
-gchar *purple_markup_escape_text(const gchar *text, gssize length)
-{
- GString *str;
-
- g_return_val_if_fail(text != NULL, NULL);
-
- if (length < 0)
- length = strlen(text);
-
- /* prealloc at least as long as original text */
- str = g_string_sized_new(length);
- append_escaped_text(str, text, length);
-
- return g_string_free(str, FALSE);
-}
-
-const char *
-purple_markup_unescape_entity(const char *text, int *length)
-{
- const char *pln;
- int len;
-
- if (!text || *text != '&')
- return NULL;
-
-#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
-
- if(IS_ENTITY("&amp;"))
- pln = "&";
- else if(IS_ENTITY("&lt;"))
- pln = "<";
- else if(IS_ENTITY("&gt;"))
- pln = ">";
- else if(IS_ENTITY("&nbsp;"))
- pln = " ";
- else if(IS_ENTITY("&copy;"))
- pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */
- else if(IS_ENTITY("&quot;"))
- pln = "\"";
- else if(IS_ENTITY("&reg;"))
- pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */
- else if(IS_ENTITY("&apos;"))
- pln = "\'";
- else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
- static char buf[7];
- const char *start = text + 2;
- char *end;
- guint64 pound;
- int base = 10;
- int buflen;
-
- if (*start == 'x') {
- base = 16;
- start++;
- }
-
- pound = g_ascii_strtoull(start, &end, base);
- if (pound == 0 || pound > INT_MAX || *end != ';') {
- return NULL;
- }
-
- len = (end - text) + 1;
-
- buflen = g_unichar_to_utf8((gunichar)pound, buf);
- buf[buflen] = '\0';
- pln = buf;
- }
- else
- return NULL;
-
- if (length)
- *length = len;
- return pln;
-}
-
-char *
-purple_markup_get_css_property(const gchar *style,
- const gchar *opt)
-{
- const gchar *css_str = style;
- const gchar *css_value_start;
- const gchar *css_value_end;
- gchar *tmp;
- gchar *ret;
-
- g_return_val_if_fail(opt != NULL, NULL);
-
- if (!css_str)
- return NULL;
-
- /* find the CSS property */
- while (1)
- {
- /* skip whitespace characters */
- while (*css_str && g_ascii_isspace(*css_str))
- css_str++;
- if (!g_ascii_isalpha(*css_str))
- return NULL;
- if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
- {
- /* go to next css property positioned after the next ';' */
- while (*css_str && *css_str != '"' && *css_str != ';')
- css_str++;
- if(*css_str != ';')
- return NULL;
- css_str++;
- }
- else
- break;
- }
-
- /* find the CSS value position in the string */
- css_str += strlen(opt);
- while (*css_str && g_ascii_isspace(*css_str))
- css_str++;
- if (*css_str != ':')
- return NULL;
- css_str++;
- while (*css_str && g_ascii_isspace(*css_str))
- css_str++;
- if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
- return NULL;
-
- /* mark the CSS value */
- css_value_start = css_str;
- while (*css_str && *css_str != '"' && *css_str != ';')
- css_str++;
- css_value_end = css_str - 1;
-
- /* Removes trailing whitespace */
- while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
- css_value_end--;
-
- tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
- ret = purple_unescape_html(tmp);
- g_free(tmp);
-
- return ret;
-}
-
-gboolean purple_markup_is_rtl(const char *html)
-{
- GData *attributes;
- const gchar *start, *end;
- gboolean res = FALSE;
-
- if (purple_markup_find_tag("span", html, &start, &end, &attributes))
- {
- /* tmp is a member of attributes and is free with g_datalist_clear call */
- const char *tmp = g_datalist_get_data(&attributes, "dir");
- if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
- res = TRUE;
- if (!res)
- {
- tmp = g_datalist_get_data(&attributes, "style");
- if (tmp)
- {
- char *tmp2 = purple_markup_get_css_property(tmp, "direction");
- if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
- res = TRUE;
- g_free(tmp2);
- }
-
- }
- g_datalist_clear(&attributes);
- }
- return res;
-}
-
-gboolean
-purple_markup_find_tag(const char *needle, const char *haystack,
- const char **start, const char **end, GData **attributes)
-{
- GData *attribs;
- const char *cur = haystack;
- char *name = NULL;
- gboolean found = FALSE;
- gboolean in_tag = FALSE;
- gboolean in_attr = FALSE;
- const char *in_quotes = NULL;
- size_t needlelen;
-
- g_return_val_if_fail( needle != NULL, FALSE);
- g_return_val_if_fail( *needle != '\0', FALSE);
- g_return_val_if_fail( haystack != NULL, FALSE);
- g_return_val_if_fail( start != NULL, FALSE);
- g_return_val_if_fail( end != NULL, FALSE);
- g_return_val_if_fail(attributes != NULL, FALSE);
-
- needlelen = strlen(needle);
- g_datalist_init(&attribs);
-
- while (*cur && !found) {
- if (in_tag) {
- if (in_quotes) {
- const char *close = cur;
-
- while (*close && *close != *in_quotes)
- close++;
-
- /* if we got the close quote, store the value and carry on from *
- * after it. if we ran to the end of the string, point to the NULL *
- * and we're outta here */
- if (*close) {
- /* only store a value if we have an attribute name */
- if (name) {
- size_t len = close - cur;
- char *val = g_strndup(cur, len);
-
- g_datalist_set_data_full(&attribs, name, val, g_free);
- g_free(name);
- name = NULL;
- }
-
- in_quotes = NULL;
- cur = close + 1;
- } else {
- cur = close;
- }
- } else if (in_attr) {
- const char *close = cur;
-
- while (*close && *close != '>' && *close != '"' &&
- *close != '\'' && *close != ' ' && *close != '=')
- close++;
-
- /* if we got the equals, store the name of the attribute. if we got
- * the quote, save the attribute and go straight to quote mode.
- * otherwise the tag closed or we reached the end of the string,
- * so we can get outta here */
- switch (*close) {
- case '"':
- case '\'':
- in_quotes = close;
- /* fall through */
- case '=':
- {
- size_t len = close - cur;
-
- /* don't store a blank attribute name */
- if (len) {
- g_free(name);
- name = g_ascii_strdown(cur, len);
- }
-
- in_attr = FALSE;
- cur = close + 1;
- }
- break;
- case ' ':
- case '>':
- in_attr = FALSE;
- /* fall through */
- default:
- cur = close;
- break;
- }
- } else {
- switch (*cur) {
- case ' ':
- /* swallow extra spaces inside tag */
- while (*cur && *cur == ' ') cur++;
- in_attr = TRUE;
- break;
- case '>':
- found = TRUE;
- *end = cur;
- break;
- case '"':
- case '\'':
- in_quotes = cur;
- /* fall through */
- default:
- cur++;
- break;
- }
- }
- } else {
- /* if we hit a < followed by the name of our tag... */
- if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
- *start = cur;
- cur = cur + needlelen + 1;
-
- /* if we're pointing at a space or a >, we found the right tag. if *
- * we're not, we've found a longer tag, so we need to skip to the *
- * >, but not being distracted by >s inside quotes. */
- if (*cur == ' ' || *cur == '>') {
- in_tag = TRUE;
- } else {
- while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
- if (*cur == '"') {
- cur++;
- while (*cur && *cur != '"')
- cur++;
- } else if (*cur == '\'') {
- cur++;
- while (*cur && *cur != '\'')
- cur++;
- } else {
- cur++;
- }
- }
- }
- } else {
- cur++;
- }
- }
- }
-
- /* clean up any attribute name from a premature termination */
- g_free(name);
-
- if (found) {
- *attributes = attribs;
- } else {
- *start = NULL;
- *end = NULL;
- *attributes = NULL;
- }
-
- return found;
-}
-
-struct purple_parse_tag {
- char *src_tag;
- char *dest_tag;
- gboolean ignore;
-};
-
-/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
- recommended in the GCC docs). It contains 'continue's that should
- affect the while-loop in purple_markup_html_to_xhtml and doing the
- above would break that.
- Also, remember to put braces in constructs that require them for
- multiple statements when using this macro. */
-#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
- const char *o = c + strlen("<" x); \
- const char *p = NULL, *q = NULL, *r = NULL; \
- /* o = iterating over full tag \
- * p = > (end of tag) \
- * q = start of quoted bit \
- * r = < inside tag \
- */ \
- GString *innards = g_string_new(""); \
- while(o && *o) { \
- if(!q && (*o == '\"' || *o == '\'') ) { \
- q = o; \
- } else if(q) { \
- if(*o == *q) { /* end of quoted bit */ \
- char *unescaped = g_strndup(q+1, o-q-1); \
- char *escaped = g_markup_escape_text(unescaped, -1); \
- g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
- g_free(unescaped); \
- g_free(escaped); \
- q = NULL; \
- } else if(*c == '\\') { \
- o++; \
- } \
- } else if(*o == '<') { \
- r = o; \
- } else if(*o == '>') { \
- p = o; \
- break; \
- } else { \
- innards = g_string_append_c(innards, *o); \
- } \
- o++; \
- } \
- if(p && !r) { /* got an end of tag and no other < earlier */\
- if(*(p-1) != '/') { \
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
- pt->src_tag = x; \
- pt->dest_tag = y; \
- tags = g_list_prepend(tags, pt); \
- } \
- if(xhtml) { \
- xhtml = g_string_append(xhtml, "<" y); \
- xhtml = g_string_append(xhtml, innards->str); \
- xhtml = g_string_append_c(xhtml, '>'); \
- } \
- c = p + 1; \
- } else { /* got end of tag with earlier < *or* didn't get anything */ \
- if(xhtml) \
- xhtml = g_string_append(xhtml, "&lt;"); \
- if(plain) \
- plain = g_string_append_c(plain, '<'); \
- c++; \
- } \
- g_string_free(innards, TRUE); \
- continue; \
- } \
- if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
- (*(c+strlen("<" x)) == '>' || \
- !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
- if(xhtml) \
- xhtml = g_string_append(xhtml, "<" y); \
- c += strlen("<" x); \
- if(*c != '/') { \
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
- pt->src_tag = x; \
- pt->dest_tag = y; \
- tags = g_list_prepend(tags, pt); \
- if(xhtml) \
- xhtml = g_string_append_c(xhtml, '>'); \
- } else { \
- if(xhtml) \
- xhtml = g_string_append(xhtml, "/>");\
- } \
- c = strchr(c, '>') + 1; \
- continue; \
- }
-/* Don't forget to check the note above for ALLOW_TAG_ALT. */
-#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
-void
-purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
- char **plain_out)
-{
- GString *xhtml = NULL;
- GString *plain = NULL;
- GString *url = NULL;
- GString *cdata = NULL;
- GList *tags = NULL, *tag;
- const char *c = html;
- char quote = '\0';
-
-#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
- quote = *(ptr++); \
- else \
- quote = '\0';
-
-#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
-
- g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
-
- if(xhtml_out)
- xhtml = g_string_new("");
- if(plain_out)
- plain = g_string_new("");
-
- while(c && *c) {
- if(*c == '<') {
- if(*(c+1) == '/') { /* closing tag */
- tag = tags;
- while(tag) {
- struct purple_parse_tag *pt = tag->data;
- if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
- c += strlen(pt->src_tag) + 3;
- break;
- }
- tag = tag->next;
- }
- if(tag) {
- while(tags) {
- struct purple_parse_tag *pt = tags->data;
- if(xhtml && !pt->ignore)
- g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
- if(plain && purple_strequal(pt->src_tag, "a")) {
- /* if this is a link, we have to add the url to the plaintext, too */
- if (cdata && url &&
- (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
- g_utf8_collate(url->str + 7, cdata->str) != 0)))
- g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
- if (cdata) {
- g_string_free(cdata, TRUE);
- cdata = NULL;
- }
-
- }
- if(tags == tag)
- break;
- tags = g_list_delete_link(tags, tags);
- g_free(pt);
- }
- g_free(tag->data);
- tags = g_list_delete_link(tags, tag);
- } else {
- /* a closing tag we weren't expecting...
- * we'll let it slide, if it's really a tag...if it's
- * just a </ we'll escape it properly */
- const char *end = c+2;
- while(*end && g_ascii_isalpha(*end))
- end++;
- if(*end == '>') {
- c = end+1;
- } else {
- if(xhtml)
- xhtml = g_string_append(xhtml, "&lt;");
- if(plain)
- plain = g_string_append_c(plain, '<');
- c++;
- }
- }
- } else { /* opening tag */
- ALLOW_TAG("blockquote");
- ALLOW_TAG("cite");
- ALLOW_TAG("div");
- ALLOW_TAG("em");
- ALLOW_TAG("h1");
- ALLOW_TAG("h2");
- ALLOW_TAG("h3");
- ALLOW_TAG("h4");
- ALLOW_TAG("h5");
- ALLOW_TAG("h6");
- /* we only allow html to start the message */
- if(c == html) {
- ALLOW_TAG("html");
- }
- ALLOW_TAG_ALT("i", "em");
- ALLOW_TAG_ALT("italic", "em");
- ALLOW_TAG("li");
- ALLOW_TAG("ol");
- ALLOW_TAG("p");
- ALLOW_TAG("pre");
- ALLOW_TAG("q");
- ALLOW_TAG("span");
- ALLOW_TAG("ul");
-
-
- /* we skip <HR> because it's not legal in XHTML-IM. However,
- * we still want to send something sensible, so we put a
- * linebreak in its place. <BR> also needs special handling
- * because putting a </BR> to close it would just be dumb. */
- if((!g_ascii_strncasecmp(c, "<br", 3)
- || !g_ascii_strncasecmp(c, "<hr", 3))
- && (*(c+3) == '>' ||
- !g_ascii_strncasecmp(c+3, "/>", 2) ||
- !g_ascii_strncasecmp(c+3, " />", 3))) {
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<br/>");
- if(plain && *c != '\n')
- plain = g_string_append_c(plain, '\n');
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- if (*(c+2) == '>')
- pt->src_tag = "b";
- else if (*(c+2) == 'o')
- pt->src_tag = "bold";
- else
- pt->src_tag = "strong";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = *(c+2) == '>' ? "u" : "underline";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if (xhtml)
- xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = *(c+2) == '>' ? "s" : "strike";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = "sub";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = "sup";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
- continue;
- }
- if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
- const char *p = c + 4;
- GString *src = NULL, *alt = NULL;
-#define ESCAPE(from, to) \
- CHECK_QUOTE(from); \
- while (VALID_CHAR(from)) { \
- int len; \
- if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
- to = g_string_append(to, "&amp;"); \
- else if (*from == '\'') \
- to = g_string_append(to, "&apos;"); \
- else \
- to = g_string_append_c(to, *from); \
- from++; \
- }
-
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "src=", 4)) {
- const char *q = p + 4;
- if (src)
- g_string_free(src, TRUE);
- src = g_string_new("");
- ESCAPE(q, src);
- p = q;
- } else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
- const char *q = p + 4;
- if (alt)
- g_string_free(alt, TRUE);
- alt = g_string_new("");
- ESCAPE(q, alt);
- p = q;
- } else {
- p++;
- }
- }
-#undef ESCAPE
- if ((c = strchr(p, '>')) != NULL)
- c++;
- else
- c = p;
- /* src and alt are required! */
- if(src && xhtml)
- g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
- if(alt) {
- if(plain)
- plain = g_string_append(plain, purple_unescape_html(alt->str));
- if(!src && xhtml)
- xhtml = g_string_append(xhtml, alt->str);
- g_string_free(alt, TRUE);
- }
- g_string_free(src, TRUE);
- continue;
- }
- if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
- const char *p = c + 2;
- struct purple_parse_tag *pt;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "href=", 5)) {
- const char *q = p + 5;
- if (url)
- g_string_free(url, TRUE);
- url = g_string_new("");
- if (cdata)
- g_string_free(cdata, TRUE);
- cdata = g_string_new("");
- CHECK_QUOTE(q);
- while (VALID_CHAR(q)) {
- int len;
- if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
- url = g_string_append(url, "&amp;");
- else if (*q == '"')
- url = g_string_append(url, "&quot;");
- else
- url = g_string_append_c(url, *q);
- q++;
- }
- p = q;
- } else {
- p++;
- }
- }
- if ((c = strchr(p, '>')) != NULL)
- c++;
- else
- c = p;
- pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = "a";
- pt->dest_tag = "a";
- tags = g_list_prepend(tags, pt);
- if(xhtml)
- g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
- continue;
- }
-#define ESCAPE(from, to) \
- CHECK_QUOTE(from); \
- while (VALID_CHAR(from)) { \
- int len; \
- if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
- to = g_string_append(to, "&amp;"); \
- else if (*from == '\'') \
- to = g_string_append_c(to, '\"'); \
- else \
- to = g_string_append_c(to, *from); \
- from++; \
- }
- if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
- const char *p = c + 5;
- GString *style = g_string_new("");
- struct purple_parse_tag *pt;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "back=", 5)) {
- const char *q = p + 5;
- GString *color = g_string_new("");
- ESCAPE(q, color);
- g_string_append_printf(style, "background: %s; ", color->str);
- g_string_free(color, TRUE);
- p = q;
- } else if (!g_ascii_strncasecmp(p, "color=", 6)) {
- const char *q = p + 6;
- GString *color = g_string_new("");
- ESCAPE(q, color);
- g_string_append_printf(style, "color: %s; ", color->str);
- g_string_free(color, TRUE);
- p = q;
- } else if (!g_ascii_strncasecmp(p, "face=", 5)) {
- const char *q = p + 5;
- GString *face = g_string_new("");
- ESCAPE(q, face);
- g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
- g_string_free(face, TRUE);
- p = q;
- } else if (!g_ascii_strncasecmp(p, "size=", 5)) {
- const char *q = p + 5;
- int sz;
- const char *size = "medium";
- CHECK_QUOTE(q);
- sz = atoi(q);
- switch (sz)
- {
- case 1:
- size = "xx-small";
- break;
- case 2:
- size = "small";
- break;
- case 3:
- size = "medium";
- break;
- case 4:
- size = "large";
- break;
- case 5:
- size = "x-large";
- break;
- case 6:
- case 7:
- size = "xx-large";
- break;
- default:
- break;
- }
- g_string_append_printf(style, "font-size: %s; ", size);
- p = q;
- } else {
- p++;
- }
- }
- if ((c = strchr(p, '>')) != NULL)
- c++;
- else
- c = p;
- pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = "font";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- if(style->len && xhtml)
- g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
- else
- pt->ignore = TRUE;
- g_string_free(style, TRUE);
- continue;
- }
-#undef ESCAPE
- if (!g_ascii_strncasecmp(c, "<body ", 6)) {
- const char *p = c + 6;
- gboolean did_something = FALSE;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
- const char *q = p + 8;
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- GString *color = g_string_new("");
- CHECK_QUOTE(q);
- while (VALID_CHAR(q)) {
- color = g_string_append_c(color, *q);
- q++;
- }
- if (xhtml)
- g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
- g_string_free(color, TRUE);
- if ((c = strchr(p, '>')) != NULL)
- c++;
- else
- c = p;
- pt->src_tag = "body";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- did_something = TRUE;
- break;
- }
- p++;
- }
- if (did_something) continue;
- }
- /* this has to come after the special case for bgcolor */
- ALLOW_TAG("body");
- if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
- char *p = strstr(c + strlen("<!--"), "-->");
- if(p) {
- if(xhtml)
- xhtml = g_string_append(xhtml, "<!--");
- c += strlen("<!--");
- continue;
- }
- }
-
- if(xhtml)
- xhtml = g_string_append(xhtml, "&lt;");
- if(plain)
- plain = g_string_append_c(plain, '<');
- c++;
- }
- } else if(*c == '&') {
- char buf[7];
- const char *pln;
- int len;
-
- if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
- len = 1;
- g_snprintf(buf, sizeof(buf), "%c", *c);
- pln = buf;
- }
- if(xhtml)
- xhtml = g_string_append_len(xhtml, c, len);
- if(plain)
- plain = g_string_append(plain, pln);
- if(cdata)
- cdata = g_string_append_len(cdata, c, len);
- c += len;
- } else {
- if(xhtml)
- xhtml = g_string_append_c(xhtml, *c);
- if(plain)
- plain = g_string_append_c(plain, *c);
- if(cdata)
- cdata = g_string_append_c(cdata, *c);
- c++;
- }
- }
- if(xhtml) {
- for (tag = tags; tag ; tag = tag->next) {
- struct purple_parse_tag *pt = tag->data;
- if(!pt->ignore)
- g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
- }
- }
- g_list_free(tags);
- if(xhtml_out)
- *xhtml_out = g_string_free(xhtml, FALSE);
- if(plain_out)
- *plain_out = g_string_free(plain, FALSE);
- if(url)
- g_string_free(url, TRUE);
- if (cdata)
- g_string_free(cdata, TRUE);
-#undef CHECK_QUOTE
-#undef VALID_CHAR
-}
-
-/* The following are probably reasonable changes:
- * - \n should be converted to a normal space
- * - in addition to <br>, <p> and <div> etc. should also be converted into \n
- * - We want to turn </td>#whitespace<td> sequences into a single tab
- * - We want to turn </tr>#whitespace<tr> sequences into a single \n
- * - <script>...</script> and <style>...</style> should be completely removed
- */
-
-char *
-purple_markup_strip_html(const char *str)
-{
- int i, j, k, entlen;
- gboolean visible = TRUE;
- gboolean closing_td_p = FALSE;
- gchar *str2;
- const gchar *cdata_close_tag = NULL, *ent;
- gchar *href = NULL;
- int href_st = 0;
-
- if(!str)
- return NULL;
-
- str2 = g_strdup(str);
-
- for (i = 0, j = 0; str2[i]; i++)
- {
- if (str2[i] == '<')
- {
- if (cdata_close_tag)
- {
- /* Note: Don't even assume any other tag is a tag in CDATA */
- if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
- strlen(cdata_close_tag)) == 0)
- {
- i += strlen(cdata_close_tag) - 1;
- cdata_close_tag = NULL;
- }
- continue;
- }
- else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
- {
- str2[j++] = '\t';
- visible = TRUE;
- }
- else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
- {
- closing_td_p = TRUE;
- visible = FALSE;
- }
- else
- {
- closing_td_p = FALSE;
- visible = TRUE;
- }
-
- k = i + 1;
-
- if(g_ascii_isspace(str2[k]))
- visible = TRUE;
- else if (str2[k])
- {
- /* Scan until we end the tag either implicitly (closed start
- * tag) or explicitly, using a sloppy method (i.e., < or >
- * inside quoted attributes will screw us up)
- */
- while (str2[k] && str2[k] != '<' && str2[k] != '>')
- {
- k++;
- }
-
- /* If we've got an <a> tag with an href, save the address
- * to print later. */
- if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
- g_ascii_isspace(str2[i+2]))
- {
- int st; /* start of href, inclusive [ */
- int end; /* end of href, exclusive ) */
- char delim = ' ';
- /* Find start of href */
- for (st = i + 3; st < k; st++)
- {
- if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
- {
- st += 5;
- if (str2[st] == '"' || str2[st] == '\'')
- {
- delim = str2[st];
- st++;
- }
- break;
- }
- }
- /* find end of address */
- for (end = st; end < k && str2[end] != delim; end++)
- {
- /* All the work is done in the loop construct above. */
- }
-
- /* If there's an address, save it. If there was
- * already one saved, kill it. */
- if (st < k)
- {
- char *tmp;
- g_free(href);
- tmp = g_strndup(str2 + st, end - st);
- href = purple_unescape_html(tmp);
- g_free(tmp);
- href_st = j;
- }
- }
-
- /* Replace </a> with an ascii representation of the
- * address the link was pointing to. */
- else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
- {
- size_t hrlen = strlen(href);
-
- /* Only insert the href if it's different from the CDATA. */
- if ((hrlen != (gsize)(j - href_st) ||
- strncmp(str2 + href_st, href, hrlen)) &&
- (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
- strncmp(str2 + href_st, href + 7, hrlen - 7)))
- {
- str2[j++] = ' ';
- str2[j++] = '(';
- memmove(str2 + j, href, hrlen);
- j += hrlen;
- str2[j++] = ')';
- g_free(href);
- href = NULL;
- }
- }
-
- /* Check for tags which should be mapped to newline (but ignore some of
- * the tags at the beginning of the text) */
- else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
- || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
- {
- str2[j++] = '\n';
- }
- /* Check for tags which begin CDATA and need to be closed */
- else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
- {
- cdata_close_tag = "</script>";
- }
- else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
- {
- cdata_close_tag = "</style>";
- }
- /* Update the index and continue checking after the tag */
- i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
- continue;
- }
- }
- else if (cdata_close_tag)
- {
- continue;
- }
- else if (!g_ascii_isspace(str2[i]))
- {
- visible = TRUE;
- }
-
- if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
- {
- while (*ent)
- str2[j++] = *ent++;
- i += entlen - 1;
- continue;
- }
-
- if (visible)
- str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
- }
-
- g_free(href);
-
- str2[j] = '\0';
-
- return str2;
-}
-
-static gboolean
-badchar(char c)
-{
- switch (c) {
- case ' ':
- case ',':
- case '\0':
- case '\n':
- case '\r':
- case '<':
- case '>':
- case '"':
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static gboolean
-badentity(const char *c)
-{
- if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
- !g_ascii_strncasecmp(c, "&gt;", 4) ||
- !g_ascii_strncasecmp(c, "&quot;", 6)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static const char *
-process_link(GString *ret,
- const char *start, const char *c,
- int matchlen,
- const char *urlprefix,
- int inside_paren)
-{
- char *url_buf, *tmpurlbuf;
- const char *t;
-
- for (t = c;; t++) {
- if (!badchar(*t) && !badentity(t))
- continue;
-
- if (t - c == matchlen)
- break;
-
- if (*t == ',' && *(t + 1) != ' ') {
- continue;
- }
-
- if (t > start && *(t - 1) == '.')
- t--;
- if (t > start && *(t - 1) == ')' && inside_paren > 0)
- t--;
-
- url_buf = g_strndup(c, t - c);
- tmpurlbuf = purple_unescape_html(url_buf);
- g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
- urlprefix,
- tmpurlbuf, url_buf);
- g_free(tmpurlbuf);
- g_free(url_buf);
- return t;
- }
-
- return c;
-}
-
-char *
-purple_markup_linkify(const char *text)
-{
- const char *c, *t, *q = NULL;
- char *tmpurlbuf, *url_buf;
- gunichar g;
- gboolean inside_html = FALSE;
- int inside_paren = 0;
- GString *ret;
-
- if (text == NULL)
- return NULL;
-
- ret = g_string_new("");
-
- c = text;
- while (*c) {
-
- if(*c == '(' && !inside_html) {
- inside_paren++;
- ret = g_string_append_c(ret, *c);
- c++;
- }
-
- if(inside_html) {
- if(*c == '>') {
- inside_html = FALSE;
- } else if(!q && (*c == '\"' || *c == '\'')) {
- q = c;
- } else if(q) {
- if(*c == *q)
- q = NULL;
- }
- } else if(*c == '<') {
- inside_html = TRUE;
- if (!g_ascii_strncasecmp(c, "<A", 2)) {
- while (1) {
- if (!g_ascii_strncasecmp(c, "/A>", 3)) {
- inside_html = FALSE;
- break;
- }
- ret = g_string_append_c(ret, *c);
- c++;
- if (!(*c))
- break;
- }
- }
- } else if (!g_ascii_strncasecmp(c, "http://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "https://", 8)) {
- c = process_link(ret, text, c, 8, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
- c = process_link(ret, text, c, 6, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "file://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 4, "http://", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 4, "ftp://", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 5, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
- t = c;
- while (1) {
- if (badchar(*t) || badentity(t)) {
- char *d;
- if (t - c == 7) {
- break;
- }
- if (t > text && *(t - 1) == '.')
- t--;
- if ((d = strstr(c + 7, "?")) != NULL && d < t)
- url_buf = g_strndup(c + 7, d - c - 7);
- else
- url_buf = g_strndup(c + 7, t - c - 7);
- if (!purple_email_is_valid(url_buf)) {
- g_free(url_buf);
- break;
- }
- g_free(url_buf);
- url_buf = g_strndup(c, t - c);
- tmpurlbuf = purple_unescape_html(url_buf);
- g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
- tmpurlbuf, url_buf);
- g_free(url_buf);
- g_free(tmpurlbuf);
- c = t;
- break;
- }
- t++;
- }
- } else if (c != text && (*c == '@')) {
- int flag;
- GString *gurl_buf = NULL;
- const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
-
- if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
- flag = 0;
- else {
- flag = 1;
- gurl_buf = g_string_new("");
- }
-
- t = c;
- while (flag) {
- /* iterate backwards grabbing the local part of an email address */
- g = g_utf8_get_char(t);
- if (badchar(*t) || (g >= 127) || (*t == '(') ||
- ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
- !g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
- (t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
- /* local part will already be part of ret, strip it out */
- ret = g_string_truncate(ret, ret->len - (c - t));
- ret = g_string_append_unichar(ret, g);
- break;
- } else {
- g_string_prepend_unichar(gurl_buf, g);
- t = g_utf8_find_prev_char(text, t);
- if (t < text) {
- ret = g_string_assign(ret, "");
- break;
- }
- }
- }
-
- t = g_utf8_find_next_char(c, NULL);
-
- while (flag) {
- /* iterate forwards grabbing the domain part of an email address */
- g = g_utf8_get_char(t);
- if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
- char *d;
-
- url_buf = g_string_free(gurl_buf, FALSE);
- gurl_buf = NULL;
-
- /* strip off trailing periods */
- if (*url_buf) {
- for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
- *d = '\0';
- }
-
- tmpurlbuf = purple_unescape_html(url_buf);
- if (purple_email_is_valid(tmpurlbuf)) {
- g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
- tmpurlbuf, url_buf);
- } else {
- g_string_append(ret, url_buf);
- }
- g_free(url_buf);
- g_free(tmpurlbuf);
- c = t;
-
- break;
- } else {
- g_string_append_unichar(gurl_buf, g);
- t = g_utf8_find_next_char(t, NULL);
- }
- }
-
- if (gurl_buf) {
- g_string_free(gurl_buf, TRUE);
- }
- }
-
- if(*c == ')' && !inside_html) {
- inside_paren--;
- ret = g_string_append_c(ret, *c);
- c++;
- }
-
- if (*c == 0)
- break;
-
- ret = g_string_append_c(ret, *c);
- c++;
-
- }
- return g_string_free(ret, FALSE);
-}
-
-char *purple_unescape_text(const char *in)
-{
- GString *ret;
- const char *c = in;
-
- if (in == NULL)
- return NULL;
-
- ret = g_string_new("");
- while (*c) {
- int len;
- const char *ent;
-
- if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
- g_string_append(ret, ent);
- c += len;
- } else {
- g_string_append_c(ret, *c);
- c++;
- }
- }
-
- return g_string_free(ret, FALSE);
-}
-
-char *purple_unescape_html(const char *html)
-{
- GString *ret;
- const char *c = html;
-
- if (html == NULL)
- return NULL;
-
- ret = g_string_new("");
- while (*c) {
- int len;
- const char *ent;
-
- if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
- g_string_append(ret, ent);
- c += len;
- } else if (!strncmp(c, "<br>", 4)) {
- g_string_append_c(ret, '\n');
- c += 4;
- } else {
- g_string_append_c(ret, *c);
- c++;
- }
- }
-
- return g_string_free(ret, FALSE);
-}
-
-char *
-purple_markup_slice(const char *str, guint x, guint y)
-{
- GString *ret;
- GQueue *q;
- guint z = 0;
- gboolean appended = FALSE;
- gunichar c;
- char *tag;
-
- g_return_val_if_fail(str != NULL, NULL);
- g_return_val_if_fail(x <= y, NULL);
-
- if (x == y)
- return g_strdup("");
-
- ret = g_string_new("");
- q = g_queue_new();
-
- while (*str && (z < y)) {
- c = g_utf8_get_char(str);
-
- if (c == '<') {
- char *end = strchr(str, '>');
-
- if (!end) {
- g_string_free(ret, TRUE);
- while ((tag = g_queue_pop_head(q)))
- g_free(tag);
- g_queue_free(q);
- return NULL;
- }
-
- if (!g_ascii_strncasecmp(str, "<img ", 5)) {
- z += strlen("[Image]");
- } else if (!g_ascii_strncasecmp(str, "<br", 3)) {
- z += 1;
- } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
- z += strlen("\n---\n");
- } else if (!g_ascii_strncasecmp(str, "</", 2)) {
- /* pop stack */
- char *tmp;
-
- tmp = g_queue_pop_head(q);
- g_free(tmp);
- /* z += 0; */
- } else {
- /* push it unto the stack */
- char *tmp;
-
- tmp = g_strndup(str, end - str + 1);
- g_queue_push_head(q, tmp);
- /* z += 0; */
- }
-
- if (z >= x) {
- g_string_append_len(ret, str, end - str + 1);
- }
-
- str = end;
- } else if (c == '&') {
- char *end = strchr(str, ';');
- if (!end) {
- g_string_free(ret, TRUE);
- while ((tag = g_queue_pop_head(q)))
- g_free(tag);
- g_queue_free(q);
-
- return NULL;
- }
-
- if (z >= x)
- g_string_append_len(ret, str, end - str + 1);
-
- z++;
- str = end;
- } else {
- if (z == x && z > 0 && !appended) {
- GList *l = q->tail;
-
- while (l) {
- tag = l->data;
- g_string_append(ret, tag);
- l = l->prev;
- }
- appended = TRUE;
- }
-
- if (z >= x)
- g_string_append_unichar(ret, c);
- z++;
- }
-
- str = g_utf8_next_char(str);
- }
-
- while ((tag = g_queue_pop_head(q))) {
- char *name;
-
- name = purple_markup_get_tag_name(tag);
- g_string_append_printf(ret, "</%s>", name);
- g_free(name);
- g_free(tag);
- }
-
- g_queue_free(q);
- return g_string_free(ret, FALSE);
-}
-
-char *
-purple_markup_get_tag_name(const char *tag)
-{
- int i;
- g_return_val_if_fail(tag != NULL, NULL);
- g_return_val_if_fail(*tag == '<', NULL);
-
- for (i = 1; tag[i]; i++)
- if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
- break;
-
- return g_strndup(tag+1, i-1);
-}
-
-/**************************************************************************
* Path/Filename Functions
**************************************************************************/
const char *