summaryrefslogtreecommitdiff
path: root/src/poparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/poparser.c')
-rw-r--r--src/poparser.c91
1 files changed, 80 insertions, 11 deletions
diff --git a/src/poparser.c b/src/poparser.c
index 2494342..2d4bcef 100644
--- a/src/poparser.c
+++ b/src/poparser.c
@@ -2,27 +2,38 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
+#include <iconv.h>
#include "poparser.h"
#include "StringEscape.h"
#define streq(A, B) (!strcmp(A, B))
#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
-static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart) {
+static unsigned fuzzymark = 0;
+static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) {
enum po_entry result_type;
char *x, *y;
size_t start = (size_t) lp;
while(isspace(*lp) && lp < end) lp++;
if(lp[0] == '#') {
+ char *s;
+ if(s = strstr(lp, "fuzzy")) {
+ if(fuzzymark != 0) fuzzymark++;
+ else fuzzymark=2;
+ }
inv:
*stringstart = 0;
return pe_invalid;
}
if((y = strstarts(lp, "msg"))) {
- if((x = strstarts(y, "id")) && (isspace(*x) || ((x = strstarts(x, "_plural")) && isspace(*x))))
+ if((x = strstarts(y, "id")) && isspace(*x))
result_type = pe_msgid;
+ else if ((x = strstarts(y, "id_plural")) && isspace(*x))
+ result_type = pe_plural;
+ else if ((x = strstarts(y, "ctxt")) && isspace(*x))
+ result_type = pe_ctxt;
else if ((x = strstarts(y, "str")) && (isspace(*x) ||
- (x[0] == '[' && (x[1] == '0' || x[1] == '1') && x[2] == ']' && (x += 3) && isspace(*x))))
+ (x[0] == '[' && (x[1]-0x30) < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x))))
result_type = pe_msgstr;
else
goto inv;
@@ -31,6 +42,19 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart
conv:
*stringstart = ((size_t) x - start) + 1;
} else if(*lp == '"') {
+ if(!(*info->charset)) {
+ if(x = strstr(lp, "charset=")) {
+ // charset=xxx\\n
+ int len = strlen(x+=8) - 4;
+ assert(len <= 11);
+ if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) {
+ memcpy(info->charset, x, len);
+ info->charset[len] = 0;
+ }
+ }
+ }
+ if(x = strstr(lp, "nplurals="))
+ info->nplurals = *(x+9) - 0x30;
result_type = pe_str;
x = lp;
goto conv;
@@ -42,7 +66,7 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart
/* expects a pointer to the first char after a opening " in a string,
* converts the string into convbuf, and returns the length of that string */
-static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
+static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) {
size_t result = 0;
char* e = x + strlen(x);
assert(e > x && e < end && *e == 0);
@@ -50,7 +74,19 @@ static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t c
while(isspace(*e)) e--;
if(*e != '"') abort();
*e = 0;
- result = unescape(x, convbuf, convbuflen);
+ char *s;
+ if(*info->charset) {
+ iconv_t ret = iconv_open("UTF-8", info->charset);
+ if(ret != (iconv_t)-1) {
+ size_t a=end-x, b=a*4;
+ char mid[b], *midp=mid;
+ iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b);
+ if(s = strstr(mid, "charset="))
+ memcpy(s+8, "UTF-8\\n\0", 8);
+ result = unescape(mid, convbuf, convbuflen);
+ // iconv doesnt recognize the encoding
+ } else result = unescape(x, convbuf, convbuflen);
+ } else result = unescape(x, convbuf, convbuflen);
return result;
}
@@ -62,6 +98,10 @@ void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_
p->prev_type = pe_invalid;
p->curr_len = 0;
p->cbdata = cbdata;
+ *(p->info.charset) = 0;
+ // nplurals = 2 by default
+ p->info.nplurals = 50;
+ fuzzymark = 0;
}
enum lineactions {
@@ -83,24 +123,48 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
[pe_str] = {
[pe_str] = la_abort,
[pe_msgid] = la_abort,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_abort,
[pe_msgstr] = la_abort,
[pe_invalid] = la_abort,
},
[pe_msgid] = {
[pe_str] = la_incr,
+ [pe_msgid] = la_abort,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_proc,
+ [pe_msgstr] = la_proc,
+ [pe_invalid] = la_proc,
+ },
+ [pe_ctxt] = {
+ [pe_str] = la_incr,
[pe_msgid] = la_proc,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_abort,
+ [pe_msgstr] = la_abort,
+ [pe_invalid] = la_proc,
+ },
+ [pe_plural] = {
+ [pe_str] = la_incr,
+ [pe_msgid] = la_abort,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_abort,
[pe_msgstr] = la_proc,
[pe_invalid] = la_proc,
},
[pe_msgstr] = {
[pe_str] = la_incr,
[pe_msgid] = la_proc,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_abort,
[pe_msgstr] = la_proc,
[pe_invalid] = la_proc,
},
[pe_invalid] = {
- [pe_str] = la_nop, // this can happen when we have msgstr[2] "" ... "foo", since we only parse msgstr[0] and [1]
+ [pe_str] = la_abort,
[pe_msgid] = la_incr,
+ [pe_ctxt] = la_incr,
+ [pe_plural] = la_incr,
[pe_msgstr] = la_incr,
[pe_invalid] = la_nop,
},
@@ -108,20 +172,25 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
enum po_entry type;
- type = get_type_and_start(line, line + buflen, &strstart);
+ type = get_type_and_start(&p->info, line, line + buflen, &strstart);
+ if(fuzzymark) {
+ if(type == pe_ctxt && fuzzymark == 1) fuzzymark--;
+ if(type == pe_msgid) fuzzymark--;
+ if(fuzzymark > 0) return 0;
+ }
switch(action_tbl[p->prev_type][type]) {
case la_incr:
- assert(type == pe_msgid || type == pe_msgstr || type == pe_str);
- p->curr_len += get_length_and_convert(line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
+ assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt);
+ p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
break;
case la_proc:
- assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr);
+ assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt);
p->info.text = convbuf;
p->info.textlen = p->curr_len;
p->info.type = p->prev_type;
p->cb(&p->info, p->cbdata);
if(type != pe_invalid)
- p->curr_len = get_length_and_convert(line + strstart, line + buflen, convbuf, convbuflen);
+ p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen);
else
p->curr_len = 0;
break;