diff options
author | xhe <xw897002528@gmail.com> | 2017-04-02 12:31:31 +0800 |
---|---|---|
committer | xhe <xw897002528@gmail.com> | 2017-05-01 12:27:38 +0800 |
commit | 916f59f2a3361bbc1fc5a1ba1c1f76006de490fd (patch) | |
tree | e1bdb7cd277ad58010d2f6c731eed93eadccd01a | |
parent | 09c11ed0f65885bd491cee2041a939b26f28bdd1 (diff) | |
download | gettext-tiny-916f59f2a3361bbc1fc5a1ba1c1f76006de490fd.tar.gz |
support converting to UTF-8
-rw-r--r-- | src/poparser.c | 37 | ||||
-rw-r--r-- | src/poparser.h | 1 |
2 files changed, 32 insertions, 6 deletions
diff --git a/src/poparser.c b/src/poparser.c index 2494342..ecb623a 100644 --- a/src/poparser.c +++ b/src/poparser.c @@ -2,13 +2,14 @@ #include <assert.h> #include <stdlib.h> #include <string.h> +#include <iconv.h> #include "poparser.h" #include "StringEscape.h" #define streq(A, B) (!strcmp(A, B)) #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) -static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart) { +static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) { enum po_entry result_type; char *x, *y; size_t start = (size_t) lp; @@ -31,6 +32,17 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart conv: *stringstart = ((size_t) x - start) + 1; } else if(*lp == '"') { + if(!(*info->charset)) { + if(x = strstr(lp, "charset=")) { + // charset=xxx\\n + int len = strlen(x+=8) - 4; + assert(len <= 11); + if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) { + memcpy(info->charset, x, len); + info->charset[len] = 0; + } + } + } result_type = pe_str; x = lp; goto conv; @@ -42,7 +54,7 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart /* expects a pointer to the first char after a opening " in a string, * converts the string into convbuf, and returns the length of that string */ -static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) { +static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) { size_t result = 0; char* e = x + strlen(x); assert(e > x && e < end && *e == 0); @@ -50,7 +62,19 @@ static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t c while(isspace(*e)) e--; if(*e != '"') abort(); *e = 0; - result = unescape(x, convbuf, convbuflen); + char *s; + if(*info->charset) { + iconv_t ret = iconv_open("UTF-8", info->charset); + if(ret != (iconv_t)-1) { + size_t a=end-x, b=a*4; + char mid[b], *midp=mid; + iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b); + if(s = strstr(mid, "charset=")) + memcpy(s+8, "UTF-8\\n\0", 8); + result = unescape(mid, convbuf, convbuflen); + // iconv doesnt recognize the encoding + } else result = unescape(x, convbuf, convbuflen); + } else result = unescape(x, convbuf, convbuflen); return result; } @@ -62,6 +86,7 @@ void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_ p->prev_type = pe_invalid; p->curr_len = 0; p->cbdata = cbdata; + *(p->info.charset) = 0; } enum lineactions { @@ -108,11 +133,11 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { enum po_entry type; - type = get_type_and_start(line, line + buflen, &strstart); + type = get_type_and_start(&p->info, line, line + buflen, &strstart); switch(action_tbl[p->prev_type][type]) { case la_incr: assert(type == pe_msgid || type == pe_msgstr || type == pe_str); - p->curr_len += get_length_and_convert(line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); + p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); break; case la_proc: assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr); @@ -121,7 +146,7 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { p->info.type = p->prev_type; p->cb(&p->info, p->cbdata); if(type != pe_invalid) - p->curr_len = get_length_and_convert(line + strstart, line + buflen, convbuf, convbuflen); + p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen); else p->curr_len = 0; break; diff --git a/src/poparser.h b/src/poparser.h index e29594f..25e3af5 100644 --- a/src/poparser.h +++ b/src/poparser.h @@ -14,6 +14,7 @@ enum po_entry { struct po_info { enum po_entry type; char *text; + char charset[12]; size_t textlen; }; |