diff options
Diffstat (limited to 'src/poparser.c')
-rw-r--r-- | src/poparser.c | 91 |
1 files changed, 80 insertions, 11 deletions
diff --git a/src/poparser.c b/src/poparser.c index 2494342..2d4bcef 100644 --- a/src/poparser.c +++ b/src/poparser.c @@ -2,27 +2,38 @@ #include <assert.h> #include <stdlib.h> #include <string.h> +#include <iconv.h> #include "poparser.h" #include "StringEscape.h" #define streq(A, B) (!strcmp(A, B)) #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) -static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart) { +static unsigned fuzzymark = 0; +static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) { enum po_entry result_type; char *x, *y; size_t start = (size_t) lp; while(isspace(*lp) && lp < end) lp++; if(lp[0] == '#') { + char *s; + if(s = strstr(lp, "fuzzy")) { + if(fuzzymark != 0) fuzzymark++; + else fuzzymark=2; + } inv: *stringstart = 0; return pe_invalid; } if((y = strstarts(lp, "msg"))) { - if((x = strstarts(y, "id")) && (isspace(*x) || ((x = strstarts(x, "_plural")) && isspace(*x)))) + if((x = strstarts(y, "id")) && isspace(*x)) result_type = pe_msgid; + else if ((x = strstarts(y, "id_plural")) && isspace(*x)) + result_type = pe_plural; + else if ((x = strstarts(y, "ctxt")) && isspace(*x)) + result_type = pe_ctxt; else if ((x = strstarts(y, "str")) && (isspace(*x) || - (x[0] == '[' && (x[1] == '0' || x[1] == '1') && x[2] == ']' && (x += 3) && isspace(*x)))) + (x[0] == '[' && (x[1]-0x30) < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x)))) result_type = pe_msgstr; else goto inv; @@ -31,6 +42,19 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart conv: *stringstart = ((size_t) x - start) + 1; } else if(*lp == '"') { + if(!(*info->charset)) { + if(x = strstr(lp, "charset=")) { + // charset=xxx\\n + int len = strlen(x+=8) - 4; + assert(len <= 11); + if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) { + memcpy(info->charset, x, len); + info->charset[len] = 0; + } + } + } + if(x = strstr(lp, "nplurals=")) + info->nplurals = *(x+9) - 0x30; result_type = pe_str; x = lp; goto conv; @@ -42,7 +66,7 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart /* expects a pointer to the first char after a opening " in a string, * converts the string into convbuf, and returns the length of that string */ -static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) { +static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) { size_t result = 0; char* e = x + strlen(x); assert(e > x && e < end && *e == 0); @@ -50,7 +74,19 @@ static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t c while(isspace(*e)) e--; if(*e != '"') abort(); *e = 0; - result = unescape(x, convbuf, convbuflen); + char *s; + if(*info->charset) { + iconv_t ret = iconv_open("UTF-8", info->charset); + if(ret != (iconv_t)-1) { + size_t a=end-x, b=a*4; + char mid[b], *midp=mid; + iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b); + if(s = strstr(mid, "charset=")) + memcpy(s+8, "UTF-8\\n\0", 8); + result = unescape(mid, convbuf, convbuflen); + // iconv doesnt recognize the encoding + } else result = unescape(x, convbuf, convbuflen); + } else result = unescape(x, convbuf, convbuflen); return result; } @@ -62,6 +98,10 @@ void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_ p->prev_type = pe_invalid; p->curr_len = 0; p->cbdata = cbdata; + *(p->info.charset) = 0; + // nplurals = 2 by default + p->info.nplurals = 50; + fuzzymark = 0; } enum lineactions { @@ -83,24 +123,48 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { [pe_str] = { [pe_str] = la_abort, [pe_msgid] = la_abort, + [pe_ctxt] = la_abort, + [pe_plural] = la_abort, [pe_msgstr] = la_abort, [pe_invalid] = la_abort, }, [pe_msgid] = { [pe_str] = la_incr, + [pe_msgid] = la_abort, + [pe_ctxt] = la_abort, + [pe_plural] = la_proc, + [pe_msgstr] = la_proc, + [pe_invalid] = la_proc, + }, + [pe_ctxt] = { + [pe_str] = la_incr, [pe_msgid] = la_proc, + [pe_ctxt] = la_abort, + [pe_plural] = la_abort, + [pe_msgstr] = la_abort, + [pe_invalid] = la_proc, + }, + [pe_plural] = { + [pe_str] = la_incr, + [pe_msgid] = la_abort, + [pe_ctxt] = la_abort, + [pe_plural] = la_abort, [pe_msgstr] = la_proc, [pe_invalid] = la_proc, }, [pe_msgstr] = { [pe_str] = la_incr, [pe_msgid] = la_proc, + [pe_ctxt] = la_abort, + [pe_plural] = la_abort, [pe_msgstr] = la_proc, [pe_invalid] = la_proc, }, [pe_invalid] = { - [pe_str] = la_nop, // this can happen when we have msgstr[2] "" ... "foo", since we only parse msgstr[0] and [1] + [pe_str] = la_abort, [pe_msgid] = la_incr, + [pe_ctxt] = la_incr, + [pe_plural] = la_incr, [pe_msgstr] = la_incr, [pe_invalid] = la_nop, }, @@ -108,20 +172,25 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { enum po_entry type; - type = get_type_and_start(line, line + buflen, &strstart); + type = get_type_and_start(&p->info, line, line + buflen, &strstart); + if(fuzzymark) { + if(type == pe_ctxt && fuzzymark == 1) fuzzymark--; + if(type == pe_msgid) fuzzymark--; + if(fuzzymark > 0) return 0; + } switch(action_tbl[p->prev_type][type]) { case la_incr: - assert(type == pe_msgid || type == pe_msgstr || type == pe_str); - p->curr_len += get_length_and_convert(line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); + assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt); + p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); break; case la_proc: - assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr); + assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt); p->info.text = convbuf; p->info.textlen = p->curr_len; p->info.type = p->prev_type; p->cb(&p->info, p->cbdata); if(type != pe_invalid) - p->curr_len = get_length_and_convert(line + strstart, line + buflen, convbuf, convbuflen); + p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen); else p->curr_len = 0; break; |