From c086a03eaeb095312788e949afa1d22921121764 Mon Sep 17 00:00:00 2001 From: xhe Date: Sun, 2 Apr 2017 18:07:16 +0800 Subject: add support for plurals: when msgstr is empty, it's invalid. dinvalid() will delete the invalid or write them down. --- src/msgfmt.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++----------- src/poparser.c | 27 ++++++++-- src/poparser.h | 2 + 3 files changed, 160 insertions(+), 37 deletions(-) diff --git a/src/msgfmt.c b/src/msgfmt.c index 2020533..6a346ba 100644 --- a/src/msgfmt.c +++ b/src/msgfmt.c @@ -100,6 +100,15 @@ enum sysdep_types { st_priumax, st_max }; + +static char msgidbuf[1024]; +static unsigned idlen[3] = { 0 }; +static char msgstr1[4096]; +static unsigned mslen1=0; +static char msgstr2[4096]; +static unsigned mslen2=0; +static unsigned msc=0; + static const char sysdep_str[][10]={ [st_priu32] = "\x08", [st_priu64] = "\x08", @@ -184,9 +193,50 @@ static void error(const char* msg) { exit(1); } +static inline void write_plurals(struct callbackdata *d) { + d->translist[d->curr[pe_msgstr]].len=mslen1-1; + d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; + d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; + + memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], msgstr1, mslen1); + d->stroff[pe_msgstr]+=mslen1; + d->curr[pe_msgstr]++; + + d->translist[d->curr[pe_msgstr]].len=mslen2-1; + d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; + d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; + + memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], msgstr2, mslen2); + d->stroff[pe_msgstr]+=mslen2; + d->curr[pe_msgstr]++; + + mslen1=mslen2=msc=0; +} + +static inline void dinvalid(struct callbackdata *d, struct po_info *info) { + // previous group of msgid/str is invalid + if(d->curr[pe_msgid] > 1 && d->translist[d->curr[pe_msgstr]-1].len == 0) { + // set back the buffer len + d->len[pe_msgid] -= d->strlist[d->curr[pe_msgstr]-1].str.len + 1; + d->len[pe_msgstr] -= d->translist[d->curr[pe_msgstr]-1].len + 1; + // set back the offset + d->stroff[pe_msgid] = d->strlist[d->curr[pe_msgstr]-1].str.off; + d->stroff[pe_msgstr] = d->translist[d->curr[pe_msgstr]-1].off; + + // kick the invalid out of counts + d->num[pe_msgid]--; + d->curr[pe_msgid]--; + d->num[pe_msgstr] -= d->translist[d->curr[pe_msgstr]-1].len + 1; + d->curr[pe_msgstr] -= d->translist[d->curr[pe_msgstr]-1].len + 1; + } + // plural <= nplurals is allowed + if(msc && msc <= info->nplurals) + write_plurals(d); +} + int process_line_callback(struct po_info* info, void* user) { struct callbackdata *d = (struct callbackdata *) user; - assert(info->type == pe_msgid || info->type == pe_msgstr); + assert(info->type == pe_msgid || info->type == pe_msgstr || info->type == pe_plural); char **sysdeps; unsigned len, count, i, l; switch(d->pass) { @@ -197,22 +247,72 @@ int process_line_callback(struct po_info* info, void* user) { break; case pass_second: sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0); - if(info->type == pe_msgstr) { - // a mismatch of one is allowed, as there may be msgid followed by msgid_plural - if((unsigned)(d->curr[pe_msgid] - (d->curr[pe_msgstr] + count)) > 1) - error("count of msgid/msgstr mismatch\n"); + // we may met % in plurals + unsigned pc = d->curr[pe_msgid] - d->curr[pe_msgstr]; + if(pc == 2 && info->type == pe_plural) { + // extract len and off infomation + // bakup the content of msgid + for(unsigned a=pc;a>0;a--) { + idlen[a-1] = d->strlist[d->curr[pe_msgid]-a].str.len + 1; + memcpy(&msgidbuf[idlen[a]], d->strbuffer[pe_msgid] + d->strlist[d->curr[pe_msgid]-a].str.off, idlen[a-1]); + + } + // rollback the offset + d->stroff[pe_msgid]= d->strlist[d->curr[pe_msgid]-pc].str.off; } for(i=0;istrbuffer[info->type] + d->stroff[info->type], sysdeps[i], l+1); - if(info->type == pe_msgid) - d->strlist[d->curr[info->type]].str = (struct strtbl){.len=l, .off=d->stroff[info->type]}; - else { - d->translist[d->curr[info->type]] = (struct strtbl){.len=l, .off=d->stroff[info->type]}; - d->strlist[d->curr[info->type]].trans = &d->translist[d->curr[info->type]]; + if(info->type == pe_msgid) { + // check for previous group of msgid/str + dinvalid(d, info); + d->strlist[d->curr[pe_msgid]].str.len=l; + d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; + d->curr[pe_msgid]++; + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], sysdeps[i], l+1); + d->stroff[pe_msgid]+=l+1; + } else if(info->type == pe_plural) { + if(pc != 2) { + // should not count on msg_idplural + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], sysdeps[i], l+1); + d->strlist[d->curr[pe_msgid]-1].str.len+=l+1; + d->stroff[pe_msgid]+=l+1; + } else { + d->strlist[d->curr[pe_msgid]-(count-i)].str.len = idlen[(count-i)-1]-1 + l+1; + d->strlist[d->curr[pe_msgid]-(count-i)].str.off = d->stroff[pe_msgid]; + + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], &msgidbuf[idlen[(count-i)]], idlen[(count-i)-1]); + d->stroff[pe_msgid]+=idlen[(count-i)-1]; + + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], sysdeps[i], l+1); + d->stroff[pe_msgid]+=l+1; + } + } else { + if(pc == 1) { + // all one-shot + d->translist[d->curr[pe_msgstr]].len=l; + d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; + d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; + d->curr[pe_msgstr]++; + memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], sysdeps[i], l+1); + d->stroff[pe_msgstr]+=l+1; + } else if(pc == 0 ) { + d->translist[d->curr[pe_msgstr]-1].len+=l+1; + memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], sysdeps[i], l+1); + d->stroff[pe_msgstr]+=l+1; + } else { + if(i==0) { + memcpy(&msgstr1[mslen1], sysdeps[i], l+1); + mslen1 += l+1; + } else { + memcpy(&msgstr2[mslen2], sysdeps[i], l+1); + mslen2 += l+1; + msc++; + } + // time to write down + if(msc == info->nplurals) + write_plurals(d); + } } - d->curr[info->type]++; - d->stroff[info->type]+=l+1; } free(sysdeps); break; @@ -231,10 +331,12 @@ int process(FILE *in, FILE *out) { .num = { [pe_msgid] = 0, [pe_msgstr] = 0, + [pe_plural] = 0, }, .len = { [pe_msgid] = 0, [pe_msgstr] = 0, + [pe_plural] = 0, }, .off = 0, .out = out, @@ -242,62 +344,64 @@ int process(FILE *in, FILE *out) { }; struct po_parser pb, *p = &pb; - int invalid_file = 0; mohdr.off_tbl_trans = mohdr.off_tbl_org; for(d.pass = pass_first; d.pass <= pass_second; d.pass++) { if(d.pass == pass_second) { // start of second pass: // check that data gathered in first pass is consistent - if(d.num[pe_msgid] != d.num[pe_msgstr]) { + if((d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)) != d.num[pe_msgstr]) { // one should actually abort here, // but gnu gettext simply writes an empty .mo and returns success. //abort(); fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n"); d.num[pe_msgid] = 0; - invalid_file = 1; + return 0; } - // calculate header fields from len and num arrays - mohdr.numstring = d.num[pe_msgid]; - mohdr.off_tbl_org = sizeof(struct mo_hdr); - mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2); - // print header - fwrite(&mohdr, sizeof(mohdr), 1, out); - // set offset startvalue - d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2); - if(invalid_file) return 0; - d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1); - d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1); - d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid], 1); + d.translist = calloc(d.num[pe_msgid] * sizeof(struct strtbl), 1); + d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural], 1); d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1); d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0; assert(d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]); } + poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d); while((lp = fgets(line, sizeof(line), in))) { poparser_feed_line(p, lp, sizeof(line)); } - poparser_finish(p); + // check for previous group of msgid/str + dinvalid(&d, &p->info); + if(d.pass == pass_second) { + // calculate header fields from len and num arrays + mohdr.numstring = d.num[pe_msgid]; + mohdr.off_tbl_org = sizeof(struct mo_hdr); + mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2); + // set offset startvalue + d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2); + } fseek(in, 0, SEEK_SET); } cb_for_qsort = &d; qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp); unsigned i; + + // print header + fwrite(&mohdr, sizeof(mohdr), 1, out); for(i = 0; i < d.num[pe_msgid]; i++) { d.strlist[i].str.off += d.off; fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out); } - for(i = 0; i < d.num[pe_msgstr]; i++) { - d.strlist[i].trans->off += d.off + d.len[0]; + for(i = 0; i < d.num[pe_msgid]; i++) { + d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural]; fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out); } - fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid], 1, d.out); + fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural], 1, d.out); fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out); return 0; diff --git a/src/poparser.c b/src/poparser.c index ecb623a..5efa5d8 100644 --- a/src/poparser.c +++ b/src/poparser.c @@ -20,10 +20,12 @@ static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* en return pe_invalid; } if((y = strstarts(lp, "msg"))) { - if((x = strstarts(y, "id")) && (isspace(*x) || ((x = strstarts(x, "_plural")) && isspace(*x)))) + if((x = strstarts(y, "id")) && isspace(*x)) result_type = pe_msgid; + else if ((x = strstarts(y, "id_plural")) && isspace(*x)) + result_type = pe_plural; else if ((x = strstarts(y, "str")) && (isspace(*x) || - (x[0] == '[' && (x[1] == '0' || x[1] == '1') && x[2] == ']' && (x += 3) && isspace(*x)))) + (x[0] == '[' && (x[1]-0x30) < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x)))) result_type = pe_msgstr; else goto inv; @@ -43,6 +45,8 @@ static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* en } } } + if(x = strstr(lp, "nplurals=")) + info->nplurals = *(x+9) - 0x30; result_type = pe_str; x = lp; goto conv; @@ -87,6 +91,8 @@ void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_ p->curr_len = 0; p->cbdata = cbdata; *(p->info.charset) = 0; + // nplurals = 2 by default + p->info.nplurals = 50; } enum lineactions { @@ -108,24 +114,35 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { [pe_str] = { [pe_str] = la_abort, [pe_msgid] = la_abort, + [pe_plural] = la_abort, [pe_msgstr] = la_abort, [pe_invalid] = la_abort, }, [pe_msgid] = { [pe_str] = la_incr, - [pe_msgid] = la_proc, + [pe_msgid] = la_abort, + [pe_plural] = la_proc, + [pe_msgstr] = la_proc, + [pe_invalid] = la_proc, + }, + [pe_plural] = { + [pe_str] = la_incr, + [pe_msgid] = la_abort, + [pe_plural] = la_abort, [pe_msgstr] = la_proc, [pe_invalid] = la_proc, }, [pe_msgstr] = { [pe_str] = la_incr, [pe_msgid] = la_proc, + [pe_plural] = la_abort, [pe_msgstr] = la_proc, [pe_invalid] = la_proc, }, [pe_invalid] = { [pe_str] = la_nop, // this can happen when we have msgstr[2] "" ... "foo", since we only parse msgstr[0] and [1] [pe_msgid] = la_incr, + [pe_plural] = la_abort, [pe_msgstr] = la_incr, [pe_invalid] = la_nop, }, @@ -136,11 +153,11 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { type = get_type_and_start(&p->info, line, line + buflen, &strstart); switch(action_tbl[p->prev_type][type]) { case la_incr: - assert(type == pe_msgid || type == pe_msgstr || type == pe_str); + assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural); p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); break; case la_proc: - assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr); + assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural); p->info.text = convbuf; p->info.textlen = p->curr_len; p->info.type = p->prev_type; diff --git a/src/poparser.h b/src/poparser.h index 25e3af5..0e2515b 100644 --- a/src/poparser.h +++ b/src/poparser.h @@ -4,6 +4,7 @@ enum po_entry { pe_msgid = 0, + pe_plural, pe_msgstr, pe_maxstr, pe_str = pe_maxstr, @@ -15,6 +16,7 @@ struct po_info { enum po_entry type; char *text; char charset[12]; + unsigned int nplurals; size_t textlen; }; -- cgit v1.2.1