summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxhe <xw897002528@gmail.com>2017-04-02 18:07:16 +0800
committerxhe <xw897002528@gmail.com>2017-05-06 13:03:23 +0800
commitc086a03eaeb095312788e949afa1d22921121764 (patch)
tree6cd0f9e04d021c8d17fbc545a13caf9094a9bd2e
parentb49e68b34d5fb5ee328bca43d98655ec4c7f25e6 (diff)
downloadgettext-tiny-c086a03eaeb095312788e949afa1d22921121764.tar.gz
add support for plurals:
when msgstr is empty, it's invalid. dinvalid() will delete the invalid or write them down.
-rw-r--r--src/msgfmt.c168
-rw-r--r--src/poparser.c27
-rw-r--r--src/poparser.h2
3 files changed, 160 insertions, 37 deletions
diff --git a/src/msgfmt.c b/src/msgfmt.c
index 2020533..6a346ba 100644
--- a/src/msgfmt.c
+++ b/src/msgfmt.c
@@ -100,6 +100,15 @@ enum sysdep_types {
st_priumax,
st_max
};
+
+static char msgidbuf[1024];
+static unsigned idlen[3] = { 0 };
+static char msgstr1[4096];
+static unsigned mslen1=0;
+static char msgstr2[4096];
+static unsigned mslen2=0;
+static unsigned msc=0;
+
static const char sysdep_str[][10]={
[st_priu32] = "\x08<PRIu32>",
[st_priu64] = "\x08<PRIu64>",
@@ -184,9 +193,50 @@ static void error(const char* msg) {
exit(1);
}
+static inline void write_plurals(struct callbackdata *d) {
+ d->translist[d->curr[pe_msgstr]].len=mslen1-1;
+ d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
+ d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
+
+ memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], msgstr1, mslen1);
+ d->stroff[pe_msgstr]+=mslen1;
+ d->curr[pe_msgstr]++;
+
+ d->translist[d->curr[pe_msgstr]].len=mslen2-1;
+ d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
+ d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
+
+ memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], msgstr2, mslen2);
+ d->stroff[pe_msgstr]+=mslen2;
+ d->curr[pe_msgstr]++;
+
+ mslen1=mslen2=msc=0;
+}
+
+static inline void dinvalid(struct callbackdata *d, struct po_info *info) {
+ // previous group of msgid/str is invalid
+ if(d->curr[pe_msgid] > 1 && d->translist[d->curr[pe_msgstr]-1].len == 0) {
+ // set back the buffer len
+ d->len[pe_msgid] -= d->strlist[d->curr[pe_msgstr]-1].str.len + 1;
+ d->len[pe_msgstr] -= d->translist[d->curr[pe_msgstr]-1].len + 1;
+ // set back the offset
+ d->stroff[pe_msgid] = d->strlist[d->curr[pe_msgstr]-1].str.off;
+ d->stroff[pe_msgstr] = d->translist[d->curr[pe_msgstr]-1].off;
+
+ // kick the invalid out of counts
+ d->num[pe_msgid]--;
+ d->curr[pe_msgid]--;
+ d->num[pe_msgstr] -= d->translist[d->curr[pe_msgstr]-1].len + 1;
+ d->curr[pe_msgstr] -= d->translist[d->curr[pe_msgstr]-1].len + 1;
+ }
+ // plural <= nplurals is allowed
+ if(msc && msc <= info->nplurals)
+ write_plurals(d);
+}
+
int process_line_callback(struct po_info* info, void* user) {
struct callbackdata *d = (struct callbackdata *) user;
- assert(info->type == pe_msgid || info->type == pe_msgstr);
+ assert(info->type == pe_msgid || info->type == pe_msgstr || info->type == pe_plural);
char **sysdeps;
unsigned len, count, i, l;
switch(d->pass) {
@@ -197,22 +247,72 @@ int process_line_callback(struct po_info* info, void* user) {
break;
case pass_second:
sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0);
- if(info->type == pe_msgstr) {
- // a mismatch of one is allowed, as there may be msgid followed by msgid_plural
- if((unsigned)(d->curr[pe_msgid] - (d->curr[pe_msgstr] + count)) > 1)
- error("count of msgid/msgstr mismatch\n");
+ // we may met %<PRIu32> in plurals
+ unsigned pc = d->curr[pe_msgid] - d->curr[pe_msgstr];
+ if(pc == 2 && info->type == pe_plural) {
+ // extract len and off infomation
+ // bakup the content of msgid
+ for(unsigned a=pc;a>0;a--) {
+ idlen[a-1] = d->strlist[d->curr[pe_msgid]-a].str.len + 1;
+ memcpy(&msgidbuf[idlen[a]], d->strbuffer[pe_msgid] + d->strlist[d->curr[pe_msgid]-a].str.off, idlen[a-1]);
+
+ }
+ // rollback the offset
+ d->stroff[pe_msgid]= d->strlist[d->curr[pe_msgid]-pc].str.off;
}
for(i=0;i<count;i++) {
l = strlen(sysdeps[i]);
- memcpy(d->strbuffer[info->type] + d->stroff[info->type], sysdeps[i], l+1);
- if(info->type == pe_msgid)
- d->strlist[d->curr[info->type]].str = (struct strtbl){.len=l, .off=d->stroff[info->type]};
- else {
- d->translist[d->curr[info->type]] = (struct strtbl){.len=l, .off=d->stroff[info->type]};
- d->strlist[d->curr[info->type]].trans = &d->translist[d->curr[info->type]];
+ if(info->type == pe_msgid) {
+ // check for previous group of msgid/str
+ dinvalid(d, info);
+ d->strlist[d->curr[pe_msgid]].str.len=l;
+ d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid];
+ d->curr[pe_msgid]++;
+ memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], sysdeps[i], l+1);
+ d->stroff[pe_msgid]+=l+1;
+ } else if(info->type == pe_plural) {
+ if(pc != 2) {
+ // should not count on msg_idplural
+ memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], sysdeps[i], l+1);
+ d->strlist[d->curr[pe_msgid]-1].str.len+=l+1;
+ d->stroff[pe_msgid]+=l+1;
+ } else {
+ d->strlist[d->curr[pe_msgid]-(count-i)].str.len = idlen[(count-i)-1]-1 + l+1;
+ d->strlist[d->curr[pe_msgid]-(count-i)].str.off = d->stroff[pe_msgid];
+
+ memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], &msgidbuf[idlen[(count-i)]], idlen[(count-i)-1]);
+ d->stroff[pe_msgid]+=idlen[(count-i)-1];
+
+ memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], sysdeps[i], l+1);
+ d->stroff[pe_msgid]+=l+1;
+ }
+ } else {
+ if(pc == 1) {
+ // all one-shot
+ d->translist[d->curr[pe_msgstr]].len=l;
+ d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
+ d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
+ d->curr[pe_msgstr]++;
+ memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], sysdeps[i], l+1);
+ d->stroff[pe_msgstr]+=l+1;
+ } else if(pc == 0 ) {
+ d->translist[d->curr[pe_msgstr]-1].len+=l+1;
+ memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], sysdeps[i], l+1);
+ d->stroff[pe_msgstr]+=l+1;
+ } else {
+ if(i==0) {
+ memcpy(&msgstr1[mslen1], sysdeps[i], l+1);
+ mslen1 += l+1;
+ } else {
+ memcpy(&msgstr2[mslen2], sysdeps[i], l+1);
+ mslen2 += l+1;
+ msc++;
+ }
+ // time to write down
+ if(msc == info->nplurals)
+ write_plurals(d);
+ }
}
- d->curr[info->type]++;
- d->stroff[info->type]+=l+1;
}
free(sysdeps);
break;
@@ -231,10 +331,12 @@ int process(FILE *in, FILE *out) {
.num = {
[pe_msgid] = 0,
[pe_msgstr] = 0,
+ [pe_plural] = 0,
},
.len = {
[pe_msgid] = 0,
[pe_msgstr] = 0,
+ [pe_plural] = 0,
},
.off = 0,
.out = out,
@@ -242,62 +344,64 @@ int process(FILE *in, FILE *out) {
};
struct po_parser pb, *p = &pb;
- int invalid_file = 0;
mohdr.off_tbl_trans = mohdr.off_tbl_org;
for(d.pass = pass_first; d.pass <= pass_second; d.pass++) {
if(d.pass == pass_second) {
// start of second pass:
// check that data gathered in first pass is consistent
- if(d.num[pe_msgid] != d.num[pe_msgstr]) {
+ if((d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)) != d.num[pe_msgstr]) {
// one should actually abort here,
// but gnu gettext simply writes an empty .mo and returns success.
//abort();
fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n");
d.num[pe_msgid] = 0;
- invalid_file = 1;
+ return 0;
}
- // calculate header fields from len and num arrays
- mohdr.numstring = d.num[pe_msgid];
- mohdr.off_tbl_org = sizeof(struct mo_hdr);
- mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2);
- // print header
- fwrite(&mohdr, sizeof(mohdr), 1, out);
- // set offset startvalue
- d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2);
- if(invalid_file) return 0;
-
d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1);
- d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1);
- d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid], 1);
+ d.translist = calloc(d.num[pe_msgid] * sizeof(struct strtbl), 1);
+ d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural], 1);
d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1);
d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0;
assert(d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]);
}
+
poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d);
while((lp = fgets(line, sizeof(line), in))) {
poparser_feed_line(p, lp, sizeof(line));
}
-
poparser_finish(p);
+ // check for previous group of msgid/str
+ dinvalid(&d, &p->info);
+ if(d.pass == pass_second) {
+ // calculate header fields from len and num arrays
+ mohdr.numstring = d.num[pe_msgid];
+ mohdr.off_tbl_org = sizeof(struct mo_hdr);
+ mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2);
+ // set offset startvalue
+ d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2);
+ }
fseek(in, 0, SEEK_SET);
}
cb_for_qsort = &d;
qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp);
unsigned i;
+
+ // print header
+ fwrite(&mohdr, sizeof(mohdr), 1, out);
for(i = 0; i < d.num[pe_msgid]; i++) {
d.strlist[i].str.off += d.off;
fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out);
}
- for(i = 0; i < d.num[pe_msgstr]; i++) {
- d.strlist[i].trans->off += d.off + d.len[0];
+ for(i = 0; i < d.num[pe_msgid]; i++) {
+ d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural];
fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out);
}
- fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid], 1, d.out);
+ fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural], 1, d.out);
fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out);
return 0;
diff --git a/src/poparser.c b/src/poparser.c
index ecb623a..5efa5d8 100644
--- a/src/poparser.c
+++ b/src/poparser.c
@@ -20,10 +20,12 @@ static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* en
return pe_invalid;
}
if((y = strstarts(lp, "msg"))) {
- if((x = strstarts(y, "id")) && (isspace(*x) || ((x = strstarts(x, "_plural")) && isspace(*x))))
+ if((x = strstarts(y, "id")) && isspace(*x))
result_type = pe_msgid;
+ else if ((x = strstarts(y, "id_plural")) && isspace(*x))
+ result_type = pe_plural;
else if ((x = strstarts(y, "str")) && (isspace(*x) ||
- (x[0] == '[' && (x[1] == '0' || x[1] == '1') && x[2] == ']' && (x += 3) && isspace(*x))))
+ (x[0] == '[' && (x[1]-0x30) < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x))))
result_type = pe_msgstr;
else
goto inv;
@@ -43,6 +45,8 @@ static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* en
}
}
}
+ if(x = strstr(lp, "nplurals="))
+ info->nplurals = *(x+9) - 0x30;
result_type = pe_str;
x = lp;
goto conv;
@@ -87,6 +91,8 @@ void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_
p->curr_len = 0;
p->cbdata = cbdata;
*(p->info.charset) = 0;
+ // nplurals = 2 by default
+ p->info.nplurals = 50;
}
enum lineactions {
@@ -108,24 +114,35 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
[pe_str] = {
[pe_str] = la_abort,
[pe_msgid] = la_abort,
+ [pe_plural] = la_abort,
[pe_msgstr] = la_abort,
[pe_invalid] = la_abort,
},
[pe_msgid] = {
[pe_str] = la_incr,
- [pe_msgid] = la_proc,
+ [pe_msgid] = la_abort,
+ [pe_plural] = la_proc,
+ [pe_msgstr] = la_proc,
+ [pe_invalid] = la_proc,
+ },
+ [pe_plural] = {
+ [pe_str] = la_incr,
+ [pe_msgid] = la_abort,
+ [pe_plural] = la_abort,
[pe_msgstr] = la_proc,
[pe_invalid] = la_proc,
},
[pe_msgstr] = {
[pe_str] = la_incr,
[pe_msgid] = la_proc,
+ [pe_plural] = la_abort,
[pe_msgstr] = la_proc,
[pe_invalid] = la_proc,
},
[pe_invalid] = {
[pe_str] = la_nop, // this can happen when we have msgstr[2] "" ... "foo", since we only parse msgstr[0] and [1]
[pe_msgid] = la_incr,
+ [pe_plural] = la_abort,
[pe_msgstr] = la_incr,
[pe_invalid] = la_nop,
},
@@ -136,11 +153,11 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
type = get_type_and_start(&p->info, line, line + buflen, &strstart);
switch(action_tbl[p->prev_type][type]) {
case la_incr:
- assert(type == pe_msgid || type == pe_msgstr || type == pe_str);
+ assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural);
p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
break;
case la_proc:
- assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr);
+ assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural);
p->info.text = convbuf;
p->info.textlen = p->curr_len;
p->info.type = p->prev_type;
diff --git a/src/poparser.h b/src/poparser.h
index 25e3af5..0e2515b 100644
--- a/src/poparser.h
+++ b/src/poparser.h
@@ -4,6 +4,7 @@
enum po_entry {
pe_msgid = 0,
+ pe_plural,
pe_msgstr,
pe_maxstr,
pe_str = pe_maxstr,
@@ -15,6 +16,7 @@ struct po_info {
enum po_entry type;
char *text;
char charset[12];
+ unsigned int nplurals;
size_t textlen;
};