diff options
-rw-r--r-- | src/msgfmt.c | 566 | ||||
-rw-r--r-- | src/msgmerge.c | 91 | ||||
-rw-r--r-- | src/poparser.c | 541 | ||||
-rw-r--r-- | src/poparser.h | 100 |
4 files changed, 669 insertions, 629 deletions
diff --git a/src/msgfmt.c b/src/msgfmt.c index c5d43d7..98a9b88 100644 --- a/src/msgfmt.c +++ b/src/msgfmt.c @@ -1,28 +1,21 @@ /* msgfmt utility (C) 2012 rofl0r * released under the MIT license, see LICENSE for details */ +#define _BSD_SOURCE #include <stdio.h> #include <stdlib.h> +#include <stdint.h> #include <string.h> #include <ctype.h> +#include <limits.h> #include <assert.h> #include "poparser.h" -// in DO_NOTHING mode, we simply write the msgid twice, once for msgid, once for msgstr. -// TODO: maybe make it write "" instead of echoing the msgid. -//#define DO_NOTHING - -__attribute__((noreturn)) static void syntax(void) { - fprintf(stdout, - "Usage: msgfmt [OPTION] filename.po ...\n"); - exit(1); + fprintf(stdout, "Usage: msgfmt [OPTION] filename.po ...\n"); } -__attribute__((noreturn)) static void version(void) { - fprintf(stdout, - "msgfmt (GNU gettext-tools compatible) 99.9999.9999\n"); - exit(0); + fprintf(stdout, "msgfmt (GNU gettext-tools compatible) 99.9999.9999\n"); } #define streq(A, B) (!strcmp(A, B)) @@ -56,314 +49,114 @@ const struct mo_hdr def_hdr = { 0, }; - -// pass 0: collect numbers of strings, calculate size and offsets for tables -// print header -// pass 1: create in-memory string tables -enum passes { - pass_first = 0, - pass_collect_sizes = pass_first, - pass_second, - pass_max, -}; - struct strtbl { - unsigned len, off; + uint32_t len, off; }; struct strmap { - struct strtbl str, *trans; + struct strtbl str; + struct strtbl trans; }; struct callbackdata { - enum passes pass; - unsigned off; FILE* out; - unsigned msgidbuf1_len; - unsigned msgidbuf2_len; - unsigned pluralbuf1_len; - unsigned pluralbuf2_len; - unsigned ctxtbuf_len; - unsigned msgstr1_len; - unsigned msgstr2_len; - unsigned pluralstr_count; - unsigned string_maxlen; - char* msgidbuf1; - char* msgidbuf2; - char* pluralbuf1; - char* pluralbuf2; - char* msgctxtbuf; - char* msgstrbuf1; - char* msgstrbuf2; - unsigned priv_type; - unsigned priv_len; - unsigned num[pe_maxstr]; - unsigned len[pe_maxstr]; - struct strmap *strlist; - struct strtbl *translist; - char *strbuffer[pe_maxstr]; - unsigned stroff[pe_maxstr]; - unsigned curr[pe_maxstr]; + enum po_stage stage; + size_t cnt; + size_t len[2]; + char* buf[2]; + struct strmap *list; }; static struct callbackdata *cb_for_qsort; -int strmap_comp(const void *a_, const void *b_) { +int strtbl_cmp(const void *a_, const void *b_) { const struct strmap *a = a_, *b = b_; - return strcmp(cb_for_qsort->strbuffer[0] + a->str.off, cb_for_qsort->strbuffer[0] + b->str.off); + return strcmp(cb_for_qsort->buf[0] + a->str.off, cb_for_qsort->buf[0] + b->str.off); } -enum sysdep_types { - st_priu32 = 0, - st_priu64, - st_priumax, - st_max -}; +int process_line_callback(po_message_t msg, void* user) { + struct callbackdata *d = (struct callbackdata *) user; + struct strtbl *str, *trans; + size_t m; + int cnt[st_max] = {0}; + int i, k; -static const char sysdep_str[][10]={ - [st_priu32] = "\x08<PRIu32>", - [st_priu64] = "\x08<PRIu64>", - [st_priumax] = "\x09<PRIuMAX>", -}; -static const char sysdep_repl[][8]={ - [st_priu32] = "\x02lu\0u", - [st_priu64] = "\x02lu\0llu", - [st_priumax] = "\x01ju" -}; -static const char *get_repl(enum sysdep_types type, unsigned nr) { - assert(nr < (unsigned)sysdep_repl[type][0]); - const char* p = sysdep_repl[type]+1; - while(nr--) p+=strlen(p)+1; - return p; -} -static void replace(char* text, unsigned textlen, const char* what, const char * with) { - char*p = text; - size_t la = strlen(what), li=strlen(with); - assert(la >= li); - for(p=text;textlen >= la;) { - if(!memcmp(p,what,la)) { - memcpy(p, with, li); - textlen -= la; - memmove(p+li,p+la,textlen+1); - p+=li; - } else { - p++; - textlen--; - } - } -} -static unsigned get_form(enum sysdep_types type, unsigned no, unsigned occurences[st_max]) { - unsigned i,divisor = 1; - for(i=type+1;i<st_max;i++) if(occurences[i]) divisor *= sysdep_repl[i][0]; - return (no/divisor)%sysdep_repl[type][0]; -} -static char** sysdep_transform(const char* text, unsigned textlen, unsigned *len, unsigned *count, int simulate) { - unsigned occurences[st_max] = {0}; - const char *p=text,*o; - unsigned i,j, l = textlen; - while(l && (o=strchr(p, '<'))) { - l-=o-p;p=o; - unsigned f = 0; - for(i=0;i<st_max;i++) - if(l>=(unsigned)sysdep_str[i][0] && !memcmp(p,sysdep_str[i]+1,sysdep_str[i][0])) { - occurences[i]++; - f=1; - p+=sysdep_str[i][0]; - l-=sysdep_str[i][0]; - break; - } - if(!f) p++,l--; - } - *count = 1; - for(i=0;i<st_max;i++) if(occurences[i]) *count *= sysdep_repl[i][0]; - l = textlen * *count; - for(i=0;i<*count;i++) for(j=0;j<st_max;j++) - if(occurences[j]) l-= occurences[j] * (sysdep_str[j][0] - strlen(get_repl(j, get_form(j, i, occurences)))); - *len = l+*count-1; - - char **out = 0; - if(!simulate) { - out = malloc((sizeof(char*)+textlen+1) * *count); - assert(out); - char *p = (void*)(out+*count); - for(i=0;i<*count;i++) { - out[i]=p; - memcpy(p, text, textlen+1); - p+=textlen+1; - } - for(i=0;i<*count;i++) for(j=0;j<st_max;j++) - if(occurences[j]) - replace(out[i], textlen, sysdep_str[j]+1, get_repl(j, get_form(j, i, occurences))); - } + if (msg->flags & PO_FUZZY) return 0; + if (msg->strlen[0] == 0) return 0; - return out; -} + switch(d->stage) { + case ps_size: + k = 1; + for (i=0; i < st_max; i++) + if (msg->sysdep[i]) + k *= msg->sysdep[i]; -static void error(const char* msg) { - fprintf(stderr, msg); - exit(1); -} + d->len[0] += (msg->id_len + 1)*k; -static inline void writemsg(struct callbackdata *d) { - if(d->msgidbuf1_len != 0) { - if(!d->strlist[d->curr[pe_msgid]].str.off) - d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; + if (msg->plural_len) + d->len[0] += (msg->plural_len + 1)*k; - if(d->ctxtbuf_len != 0) { - memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len); - d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len; - d->stroff[pe_msgid]+=d->ctxtbuf_len; - } - memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf1, d->msgidbuf1_len); - d->stroff[pe_msgid]+=d->msgidbuf1_len; - d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf1_len-1; - if(d->pluralbuf1_len != 0) { - memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf1, d->pluralbuf1_len); - d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf1_len; - d->stroff[pe_msgid]+=d->pluralbuf1_len; - } - d->curr[pe_msgid]++; - } - if(d->msgidbuf2_len != 0) { - if(!d->strlist[d->curr[pe_msgid]].str.off) - d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; - - if(d->ctxtbuf_len != 0) { - memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len); - d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len; - d->stroff[pe_msgid]+=d->ctxtbuf_len; - } - memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf2, d->msgidbuf2_len); - d->stroff[pe_msgid]+=d->msgidbuf2_len; - d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf2_len-1; - if(d->pluralbuf2_len != 0) { - memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf2, d->pluralbuf2_len); - d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf2_len; - d->stroff[pe_msgid]+=d->pluralbuf2_len; - } - d->curr[pe_msgid]++; - } + if (msg->ctxt_len) + d->len[0] += (msg->ctxt_len + 1)*k; - d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=0; -} + for (i=0; msg->strlen[i]; i++) + d->len[1] += (msg->strlen[i] + 1)*k; -static inline void writestr(struct callbackdata *d, struct po_info *info) { - // msgid xx; msgstr ""; is widely happened, it's invalid - - // https://github.com/sabotage-linux/gettext-tiny/issues/1 - // no invalid, when empty, check d->num[pe_msgid] - if(!d->pluralstr_count && d->num[pe_msgid] > 0) { - d->len[pe_msgid]-=d->msgidbuf1_len; - d->len[pe_msgid]-=d->msgidbuf2_len; - d->len[pe_plural]-=d->pluralbuf1_len; - d->len[pe_plural]-=d->pluralbuf2_len; - d->len[pe_ctxt]-=d->ctxtbuf_len; - d->len[pe_msgstr]--; - d->num[pe_msgid]--; - d->num[pe_msgstr]--; - d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0; - return; - } + d->cnt += k; + break; + case ps_parse: + for (k=1; k; d->cnt++) { + k = 0; - if(d->pluralstr_count && d->pluralstr_count <= info->nplurals) { - writemsg(d); - // plural <= nplurals is allowed - d->translist[d->curr[pe_msgstr]].len=d->msgstr1_len-1; - d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; - d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; - - memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf1, d->msgstr1_len); - d->stroff[pe_msgstr]+=d->msgstr1_len; - d->curr[pe_msgstr]++; - - if(d->msgstr2_len) { - d->translist[d->curr[pe_msgstr]].len=d->msgstr2_len-1; - d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; - d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; - - memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf2, d->msgstr2_len); - d->stroff[pe_msgstr]+=d->msgstr2_len; - d->curr[pe_msgstr]++; - } + str = &d->list[d->cnt].str; + trans = &d->list[d->cnt].trans; - d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0; - } -} + str->off = d->len[0]; + str->len = 0; -int process_line_callback(struct po_info* info, void* user) { - struct callbackdata *d = (struct callbackdata *) user; - assert(info->type == pe_msgid || info->type == pe_ctxt || info->type == pe_msgstr || info->type == pe_plural); - char **sysdeps; - unsigned len, count, i, l; - switch(d->pass) { - case pass_collect_sizes: - sysdep_transform(info->text, info->textlen, &len, &count, 1); - d->num[info->type] += count; - if(info->type == pe_msgid && count == 2 && d->priv_type == pe_ctxt) { - // ctxt meets msgid with sysdeps, multiply num and len to suit it - d->len[pe_ctxt] += d->priv_len +1; - d->num[pe_ctxt]++; + if (msg->ctxt_len) { + m = poparser_sysdep(msg->ctxt, &d->buf[0][d->len[0]], cnt); + str->len += m; + d->buf[0][d->len[0]+m-1] = 0x4; + d->len[0] += m; + } + + m = poparser_sysdep(msg->id, &d->buf[0][d->len[0]], cnt); + str->len += m; + d->len[0] += m; + + if (msg->plural_len) { + m = poparser_sysdep(msg->plural, &d->buf[0][d->len[0]], cnt); + str->len += m; + d->len[0] += m; } - if(count != 1 && info->type == pe_ctxt) { - // except msgid, str, plural, all other types should not have sysdeps - abort(); + + trans->off = d->len[1]; + trans->len = 0; + for (i=0; msg->strlen[i]; i++) { + m = poparser_sysdep(msg->str[i], &d->buf[1][d->len[1]], cnt); + trans->len += m; + d->len[1] += m; } - d->priv_type = info->type; - d->priv_len = len; - d->len[info->type] += len +1; - - if(len+1 > d->string_maxlen) - d->string_maxlen = len+1; - break; - case pass_second: - sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0); - for(i=0;i<count;i++) { - l = strlen(sysdeps[i]); - assert(l+1 <= d->string_maxlen); - if(info->type == pe_msgid) { - if(i==0 && d->msgidbuf1_len) - writestr(d, info); - - // just copy, it's written down when writemsg() - if(i==0) { - memcpy(d->msgidbuf1, sysdeps[i], l+1); - d->msgidbuf1_len = l+1; - } else { - memcpy(d->msgidbuf2, sysdeps[i], l+1); - d->msgidbuf2_len = l+1; - } - } else if(info->type == pe_plural) { - if(i==0) { - memcpy(d->pluralbuf1, sysdeps[i], l+1); - d->pluralbuf1_len = l+1; - } else { - memcpy(d->pluralbuf2, sysdeps[i], l+1); - d->pluralbuf2_len = l+1; - } - } else if(info->type == pe_ctxt) { - writestr(d, info); - d->ctxtbuf_len = l+1; - memcpy(d->msgctxtbuf, sysdeps[i], l); - d->msgctxtbuf[l] = 0x4;//EOT - } else { - // just copy, it's written down when writestr() - if(l) { - if(i==0) { - memcpy(&d->msgstrbuf1[d->msgstr1_len], sysdeps[i], l+1); - d->msgstr1_len += l+1; - d->pluralstr_count++; - } else { - // sysdeps exist - memcpy(&d->msgstrbuf2[d->msgstr2_len], sysdeps[i], l+1); - d->msgstr2_len += l+1; - } + for (i=0; i < st_max; i++) { + if (cnt[i] < msg->sysdep[i]) { + cnt[i]++; + + // we have a carry + if (cnt[i] == msg->sysdep[i]) { + cnt[i] = 0; + continue; } + + k = 1; + break; } } - free(sysdeps); - break; - default: - abort(); + } + + break; + default: + abort(); } return 0; } @@ -371,113 +164,88 @@ int process_line_callback(struct po_info* info, void* user) { int process(FILE *in, FILE *out) { struct mo_hdr mohdr = def_hdr; char line[8192]; char *lp; + size_t off, i; + enum po_error t; char convbuf[32768]; struct callbackdata d = { - .num = { - [pe_msgid] = 0, - [pe_msgstr] = 0, - [pe_plural] = 0, - [pe_ctxt] = 0, - }, - .len = { - [pe_msgid] = 0, - [pe_msgstr] = 0, - [pe_plural] = 0, - [pe_ctxt] = 0, - }, - .off = 0, + .len = {0, 0}, + .cnt = 0, .out = out, - .pass = pass_first, - .ctxtbuf_len = 0, - .pluralbuf1_len = 0, - .pluralbuf2_len = 0, - .msgidbuf1_len = 0, - .msgidbuf2_len = 0, - .msgstr1_len = 0, - .msgstr2_len = 0, - .pluralstr_count = 0, - .string_maxlen = 0, }; struct po_parser pb, *p = &pb; mohdr.off_tbl_trans = mohdr.off_tbl_org; - for(d.pass = pass_first; d.pass <= pass_second; d.pass++) { - if(d.pass == pass_second) { - // start of second pass: - // ensure we dont output when there's no strings at all - if(d.num[pe_msgid] == 0) { - return 1; - } - - // check that data gathered in first pass is consistent - if((d.num[pe_msgstr] < d.num[pe_msgid]) || (d.num[pe_msgstr] > (d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)))) { - // one should actually abort here, - // but gnu gettext simply writes an empty .mo and returns success. - //abort(); - fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n"); - d.num[pe_msgid] = 0; - return 0; - } - d.msgidbuf1 = calloc(d.string_maxlen*5+2*d.string_maxlen*p->info.nplurals, 1); - d.msgidbuf2 = d.msgidbuf1 + d.string_maxlen; - d.pluralbuf1 = d.msgidbuf2 + d.string_maxlen; - d.pluralbuf2 = d.pluralbuf1 + d.string_maxlen; - d.msgctxtbuf = d.pluralbuf2 + d.string_maxlen; - d.msgstrbuf1 = d.msgctxtbuf + d.string_maxlen; - d.msgstrbuf2 = d.msgstrbuf1 + d.string_maxlen*p->info.nplurals; - - d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1); - d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1); - d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1); - d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1); - d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0; - assert(d.msgidbuf1 && d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]); - } - - poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d); + poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d); + d.stage = p->stage; - while((lp = fgets(line, sizeof(line), in))) { - poparser_feed_line(p, lp, sizeof(line)); - } - poparser_finish(p); - if(d.pass == pass_second) - writestr(&d, &p->info); - - if(d.pass == pass_second) { - // calculate header fields from len and num arrays - mohdr.numstring = d.num[pe_msgid]; - mohdr.off_tbl_org = sizeof(struct mo_hdr); - mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2); - // set offset startvalue - d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2); + while((lp = fgets(line, sizeof(line), in))) { + if ((t = poparser_feed_line(p, lp, strlen(line))) != po_success) + return t; + } + if ((t = poparser_finish(p)) != po_success) + return t; + + if (d.cnt == 0) return -1; + + d.list = (struct strmap*)malloc(sizeof(struct strmap)*d.cnt); + d.buf[0] = (char*)malloc(d.len[0]); + d.buf[1] = (char*)malloc(d.len[1]); + d.len[0] = 0; + d.len[1] = 0; + d.cnt = 0; + d.stage = p->stage; + + fseek(in, 0, SEEK_SET); + while ((lp = fgets(line, sizeof(line), in))) { + if ((t = poparser_feed_line(p, lp, strlen(line))) != po_success) { + free(d.list); + free(d.buf[0]); + free(d.buf[1]); + return t; } - fseek(in, 0, SEEK_SET); + } + if ((t = poparser_finish(p)) != po_success) { + free(d.list); + free(d.buf[0]); + free(d.buf[1]); + return t; } cb_for_qsort = &d; - qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp); - unsigned i; + qsort(d.list, d.cnt, sizeof(struct strmap), strtbl_cmp); + cb_for_qsort = NULL; // print header + mohdr.numstring = d.cnt; + mohdr.off_tbl_org = sizeof(struct mo_hdr); + mohdr.off_tbl_trans = mohdr.off_tbl_org + d.cnt * sizeof(struct strtbl); fwrite(&mohdr, sizeof(mohdr), 1, out); - for(i = 0; i < d.num[pe_msgid]; i++) { - d.strlist[i].str.off += d.off; - fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out); + + off = mohdr.off_tbl_trans + d.cnt * sizeof(struct strtbl); + for (i = 0; i < d.cnt; i++) { + d.list[i].str.off += off; + fwrite(&d.list[i].str, sizeof(struct strtbl), 1, d.out); } - for(i = 0; i < d.num[pe_msgid]; i++) { - d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural] + d.len[pe_ctxt]; - fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out); + + off += d.len[0]; + for (i = 0; i < d.cnt; i++) { + d.list[i].trans.off += off; + fwrite(&d.list[i].trans, sizeof(struct strtbl), 1, d.out); } - fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1, d.out); - fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out); + + fwrite(d.buf[0], d.len[0], 1, d.out); + fwrite(d.buf[1], d.len[1], 1, d.out); + + free(d.list); + free(d.buf[0]); + free(d.buf[1]); return 0; } - void set_file(int out, char* fn, FILE** dest) { if(streq(fn, "-")) { if(out) { @@ -505,11 +273,16 @@ void set_file(int out, char* fn, FILE** dest) { } int main(int argc, char**argv) { - if(argc == 1) syntax(); + if (argc == 1) { + syntax(); + return 0; + } + int arg = 1; FILE *out = NULL; FILE *in = NULL; int expect_in_fn = 1; + char path[PATH_MAX]; char* locale = NULL; char* dest = NULL; #define A argv[arg] @@ -539,19 +312,20 @@ int main(int argc, char**argv) { streq(A+2, "statistics") || strstarts(A+2, "check-accelerators=") || strstarts(A+2, "resource=") - ) { - } else if((dest = strstarts(A+2, "locale="))) { - locale = dest; - } else if((dest = strstarts(A+2, "output-file="))) { - set_file(1, dest, &out); - } else if(streq(A+2, "version")) { - version(); - } else if(streq(A+2, "help")) { - syntax(); - } else if (expect_in_fn) { - set_file(0, A, &in); - expect_in_fn = 0; - } + ) { + } else if((locale = strstarts(A+2, "locale="))) { + } else if((dest = strstarts(A+2, "output-file="))) { + set_file(1, dest, &out); + } else if(streq(A+2, "version")) { + version(); + return 0; + } else if(streq(A+2, "help")) { + syntax(); + return 0; + } else if (expect_in_fn) { + set_file(0, A, &in); + expect_in_fn = 0; + } } else if(streq(A + 1, "o")) { arg++; dest = A; @@ -568,8 +342,10 @@ int main(int argc, char**argv) { ) { } else if (streq(A+1, "V")) { version(); + return 0; } else if (streq(A+1, "h")) { syntax(); + return 0; } else if (streq(A+1, "l")) { arg++; locale = A; @@ -587,14 +363,14 @@ int main(int argc, char**argv) { } if (locale != NULL && dest != NULL) { - int sz = snprintf(NULL, 0, "%s/%s.msg", dest, locale); - char msg[sz+1]; - snprintf(msg, sizeof(msg), "%s/%s.msg", dest, locale); - FILE *fp = fopen(msg, "w"); + snprintf(path, sizeof(path), "%s/%s.msg", dest, locale); + FILE *fp = fopen(path, "w"); if (fp) { fclose(fp); return 0; - } else return 1; + } + + return -1; } if(out == NULL) { @@ -603,14 +379,16 @@ int main(int argc, char**argv) { } if(in == NULL || out == NULL) { - return 1; + return -1; } + int ret = process(in, out); fflush(in); fflush(out); + if(in != stdin) fclose(in); if(out != stdout) fclose(out); - if (ret == 1) { + if (ret < 0) { return remove(dest); } return ret; diff --git a/src/msgmerge.c b/src/msgmerge.c index 53bc90a..fe6dfdb 100644 --- a/src/msgmerge.c +++ b/src/msgmerge.c @@ -36,37 +36,58 @@ struct fiLes { FILE *po; FILE *pot; FILE *compend; - int plural_count; - enum po_entry prev_type; + + // the biggest length of a string + enum po_stage stage; + size_t len; + char *buf; }; /* currently we only output input strings as output strings * i.e. there is no translation lookup at all */ -int process_line_callback(struct po_info* info, void* user) { - char convbuf[16384]; +int process_line_callback(po_message_t msg, void* user) { struct fiLes* file = (struct fiLes*) user; + int i; + switch (file->stage) { + case ps_size: + if (msg->ctxt_len > file->len) + file->len = msg->ctxt_len + 1; + + if (msg->id_len > file->len) + file->len = msg->id_len + 1; + + if (msg->plural_len > file->len) + file->len = msg->plural_len + 1; + + for (i=0; i < MAX_NPLURALS; i++) + if (msg->strlen[i] > file->len) + file->len = msg->strlen[i] + 1; - // escape what is unescaped automatically by lib - escape(info->text, convbuf, sizeof(convbuf)); - switch (info->type) { - case pe_msgid: - file->plural_count = 1; - fprintf(file->out, "\nmsgid \"%s\"\n", convbuf); - file->prev_type = info->type; - break; - case pe_ctxt: - fprintf(file->out, "msgctxt \"%s\"\n", convbuf); - break; - case pe_plural: - fprintf(file->out, "msgid_plural \"%s\"\n", convbuf); - file->prev_type = info->type; break; - case pe_msgstr: - if (file->prev_type == pe_plural) { - fprintf(file->out, "msgstr[%d] \"%s\"\n", file->plural_count++, convbuf); + case ps_parse: + if (msg->ctxt_len) { + escape(msg->ctxt, file->buf, file->len); + fprintf(file->out, "msgctxt \"%s\"\n", file->buf); + } + + escape(msg->id, file->buf, file->len); + fprintf(file->out, "msgid \"%s\"\n", file->buf); + + if (msg->plural_len) { + escape(msg->plural, file->buf, file->len); + fprintf(file->out, "msgid_plural \"%s\"\n", file->buf); + } + + if (msg->plural_len) { + for (i=0; i < MAX_NPLURALS && msg->strlen[i]; i++) { + escape(msg->str[i], file->buf, file->len); + fprintf(file->out, "msgstr[%d] \"%s\"\n", i, file->buf); + } } else { - fprintf(file->out, "msgstr \"%s\"\n", convbuf); + escape(msg->str[0], file->buf, file->len); + fprintf(file->out, "msgstr \"%s\"\n", file->buf); } + break; } return 0; @@ -74,13 +95,35 @@ int process_line_callback(struct po_info* info, void* user) { int process(struct fiLes *files, int update, int backup) { (void) update; (void) backup; + enum po_error t; struct po_parser pb, *p = &pb; char line[4096], conv[8192], *lb; + + files->stage = ps_size; poparser_init(p, conv, sizeof(conv), process_line_callback, files); while((lb = fgets(line, sizeof(line), files->po))) { - poparser_feed_line(p, lb, sizeof(line)); + if ((t = poparser_feed_line(p, lb, strlen(line))) != po_success) + return t; + } + if ((t = poparser_finish(p)) != po_success) + free(files->buf); + + files->stage = ps_parse; + files->buf = (char*)malloc(files->len); + fseek(files->po, 0, SEEK_SET); + + while((lb = fgets(line, sizeof(line), files->po))) { + if ((t = poparser_feed_line(p, lb, strlen(line))) != po_success) { + free(files->buf); + return t; + } } - poparser_finish(p); + if ((t = poparser_finish(p)) != po_success) { + free(files->buf); + return t; + } + + free(files->buf); return 0; } diff --git a/src/poparser.c b/src/poparser.c index faf7e27..dc4cf86 100644 --- a/src/poparser.c +++ b/src/poparser.c @@ -1,218 +1,381 @@ #include <ctype.h> -#include <assert.h> #include <stdlib.h> #include <string.h> #include <iconv.h> #include "poparser.h" #include "StringEscape.h" -#define streq(A, B) (!strcmp(A, B)) #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) -static unsigned fuzzymark = 0; -static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) { - enum po_entry result_type; - char *x, *y; - size_t start = (size_t) lp; - while(isspace(*lp) && lp < end) lp++; - if(lp[0] == '#') { - char *s; - if(s = strstr(lp, ", fuzzy")) { - if(fuzzymark != 0) fuzzymark++; - else fuzzymark=2; - } - inv: - *stringstart = 0; - return pe_invalid; - } else if((y = strstarts(lp, "msg"))) { - if((x = strstarts(y, "id")) && isspace(*x)) - result_type = pe_msgid; - else if ((x = strstarts(y, "id_plural")) && isspace(*x)) - result_type = pe_plural; - else if ((x = strstarts(y, "ctxt")) && isspace(*x)) - result_type = pe_ctxt; - else if ((x = strstarts(y, "str")) && (isspace(*x) || - (x[0] == '[' && (x[1]-'0') < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x)))) - result_type = pe_msgstr; - else - goto inv; - while(isspace(*x) && x < end) x++; - if(*x != '"') abort(); - conv: - *stringstart = ((size_t) x - start) + 1; - } else if(lp[0] == '"') { - if(!(*info->charset)) { - if(x = strstr(lp, "charset=")) { - // charset=xxx\\n - int len = strlen(x+=8) - 4; - assert(len <= 11); - if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) { - memcpy(info->charset, x, len); - info->charset[len] = 0; - } - } - } - if(x = strstr(lp, "nplurals=")) - if(*(x+9) - '0') - info->nplurals = *(x+9) - '0'; - result_type = pe_str; - x = lp; - goto conv; - } else { - goto inv; - } - return result_type; -} - -/* expects a pointer to the first char after a opening " in a string, - * converts the string into convbuf, and returns the length of that string */ -static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) { - size_t result = 0; - char* e = x + strlen(x); - assert(e > x && e < end && *e == 0); - e--; - while(isspace(*e)) e--; - if(*e != '"') abort(); - *e = 0; - char *s; - if(*info->charset) { - iconv_t ret = iconv_open("UTF-8", info->charset); - if(ret != (iconv_t)-1) { - size_t a=end-x, b=a*4; - char mid[b], *midp=mid; - iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b); - if(s = strstr(mid, "charset=")) - memcpy(s+8, "UTF-8\\n\0", 8); - result = unescape(mid, convbuf, convbuflen); - // iconv doesnt recognize the encoding - } else result = unescape(x, convbuf, convbuflen); - } else result = unescape(x, convbuf, convbuflen); - return result; -} +static const char* sysdep_str[st_max]={ + [st_priu32] = "<PRIu32>", + [st_priu64] = "<PRIu64>", + [st_priumax] = "<PRIuMAX>", +}; +static const char* sysdep_repl[st_max][3]={ + [st_priu32] = {"\x2", "u", "lu"}, + [st_priu64] = {"\x2", "lu", "llu"}, + [st_priumax] = {"\x1", "ju"}, +}; void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata) { + int cnt; + memset(p, 0, sizeof(struct po_parser)); p->buf = workbuf; p->bufsize = bufsize; p->cb = cb; - p->prev_type = pe_invalid; - p->prev_rtype = pe_invalid; - p->curr_len = 0; p->cbdata = cbdata; - *(p->info.charset) = 0; - // nplurals = 2 by default - p->info.nplurals = 2; - fuzzymark = 0; + p->hdr.nplurals = MAX_NPLURALS; + p->max_ctxt_len = 1; + p->max_id_len = 1; + p->max_plural_len = 1; + for (cnt = 0; cnt < MAX_NPLURALS; cnt++) + p->max_strlen[cnt] = 1; + p->strcnt = 0; + p->first = true; } -enum lineactions { - la_incr, - la_proc, - la_abort, - la_nop, - la_max, -}; +static inline enum po_error poparser_feed_hdr(struct po_parser *p, po_message_t msg) { + char *x, *y; + if (p->stage == ps_parse && p->first) { + if (msg->id_len) + return -po_invalid_entry; + + if ((x = strstr(msg->str[0], "charset="))) { + for (y = x; *y && !isspace(*y); y++); + memcpy(p->hdr.charset, x+8, y-x-8); + p->hdr.charset[y-x] = 0; + + p->cd = iconv_open("UTF-8", p->hdr.charset); + if (p->cd == (iconv_t)-1) { + p->cd = 0; + return -po_unsupported_charset; + } + } + + if ((x = strstr(msg->str[0], "nplurals="))) { + p->hdr.nplurals = *(x+9) - '0'; + } + + p->first = false; + } + + return po_success; +} + +static inline enum po_error poparser_clean(struct po_parser *p, po_message_t msg) { + enum po_error t; + int i; + + if (p->strcnt) { + msg->strlen[p->strcnt] = 0; + + if ((t = poparser_feed_hdr(p, msg)) != po_success) { + return t; + } + + // met a new block starting with msgid + if (p->cb) + p->cb(msg, p->cbdata); -/* return 0 on success */ -int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { - char *convbuf = p->buf; - size_t convbuflen = p->bufsize; - size_t strstart; - - static const enum lineactions action_tbl[pe_max][pe_max] = { - // pe_str will never be set as curr_type - [pe_str] = { - [pe_str] = la_abort, - [pe_msgid] = la_abort, - [pe_ctxt] = la_abort, - [pe_plural] = la_abort, - [pe_msgstr] = la_abort, - [pe_invalid] = la_abort, - }, - [pe_msgid] = { - [pe_str] = la_incr, - [pe_msgid] = la_abort, - [pe_ctxt] = la_abort, - [pe_plural] = la_proc, - [pe_msgstr] = la_proc, - [pe_invalid] = la_proc, - }, - [pe_ctxt] = { - [pe_str] = la_incr, - [pe_msgid] = la_proc, - [pe_ctxt] = la_abort, - [pe_plural] = la_abort, - [pe_msgstr] = la_abort, - [pe_invalid] = la_proc, - }, - [pe_plural] = { - [pe_str] = la_incr, - [pe_msgid] = la_abort, - [pe_ctxt] = la_abort, - [pe_plural] = la_abort, - [pe_msgstr] = la_proc, - [pe_invalid] = la_proc, - }, - [pe_msgstr] = { - [pe_str] = la_incr, - [pe_msgid] = la_proc, - [pe_ctxt] = la_proc, - [pe_plural] = la_abort, - [pe_msgstr] = la_proc, - [pe_invalid] = la_proc, - }, - [pe_invalid] = { - [pe_str] = la_nop, - [pe_msgid] = la_incr, - [pe_ctxt] = la_incr, - [pe_plural] = la_nop, - [pe_msgstr] = la_nop, - [pe_invalid] = la_nop, - }, - }; - - enum po_entry type; - - type = get_type_and_start(&p->info, line, line + buflen, &strstart); - if(p->prev_rtype != pe_invalid && action_tbl[p->prev_rtype][type] == la_abort) - abort(); - if(type != pe_invalid && type != pe_str) - p->prev_rtype = type; - if(fuzzymark) { - if(type == pe_ctxt && fuzzymark == 1) fuzzymark--; - if(type == pe_msgid) fuzzymark--; - if(fuzzymark > 0) return 0; + for (i=0; i < st_max; i++) + msg->sysdep[i] = 0; + + msg->ctxt_len = 0; + msg->id_len = 0; + msg->plural_len = 0; + p->strcnt = 0; } - switch(action_tbl[p->prev_type][type]) { - case la_incr: - assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt); - p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); + + return po_success; +} + +enum po_error poparser_feed_line(struct po_parser *p, char* in, size_t in_len) { + char *line = in; + size_t line_len = in_len; + po_message_t msg = &p->msg; + int cnt = 0; + enum po_error t; + size_t len; + char *x, *y, *z; + + if (line_len == 0 || line[0] == '\n') { + // ignore blank lines + return po_success; + } else if (line[0] == '#') { + switch (line[1]) { + case ',': + x = &line[2]; + while (*x && (y = strpbrk(x, " ,\n"))) { + if (y != x && !memcmp(x, "fuzzy", y-x)) { + msg->flags |= PO_FUZZY; + } + x = y + strspn(y, " ,\n"); + } break; - case la_proc: - assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt); - p->info.text = convbuf; - p->info.textlen = p->curr_len; - p->info.type = p->prev_type; - p->cb(&p->info, p->cbdata); - if(type != pe_invalid) - p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen); - else - p->curr_len = 0; + case '.': + // extracted comments for translators, ignore + case ':': + // reference comments for translators, ignore + case '|': + // previous untranslated strings for translators, ignore + default: + // ignore normal comments + return po_success; + } + } else if (line[0] == '"') { + if ( (y = strrchr(x = &line[1], '"')) == NULL) + return -po_excepted_token; + + len = y - x; + *y = 0; + + if (p->cd) { + line = x; + line_len = len + 1; + x = p->buf; + len = p->bufsize; + if ( (len = iconv(p->cd, &line, &line_len, &x, &len)) == -1) + return -po_failed_iconv; + + x = p->buf; + } + + for (cnt = 0; cnt < st_max; cnt++) { + if (strstr(x, sysdep_str[cnt])) { + msg->sysdep[cnt] = sysdep_repl[cnt][0][0]; + } + } + + switch (p->previous) { + case po_str: + cnt = p->strcnt - 1; + if (p->stage == ps_parse) { + len = unescape(x, &msg->str[cnt][msg->strlen[cnt]], p->max_strlen[cnt]); + } + + msg->strlen[cnt] += len; + break; + case po_plural: + if (p->stage == ps_parse) { + len = unescape(x, &msg->plural[msg->plural_len], p->max_plural_len); + } + + msg->plural_len += len; break; - case la_nop: + case po_id: + if (p->stage == ps_parse) { + len = unescape(x, &msg->id[msg->id_len], p->max_id_len); + } + + msg->id_len += len; + break; + case po_ctxt: + if (p->stage == ps_parse) { + len = unescape(x, &msg->ctxt[msg->ctxt_len], p->max_ctxt_len); + } + + msg->ctxt_len += len; break; - case la_abort: default: - abort(); - // todo : return error code + return -po_invalid_entry; + } + } else if ((z = strstarts(line, "msg"))) { + if ( (x = strchr(z, '"')) == NULL) + return -po_excepted_token; + + if ( (y = strrchr(x+1, '"')) == NULL) + return -po_excepted_token; + + len = y - ++x; + *y = 0; + + if (p->cd) { + line = x; + line_len = len + 1; + x = p->buf; + len = p->bufsize; + if ( (len = iconv(p->cd, &line, &line_len, &x, &len)) == -1) + return -po_failed_iconv; + + x = p->buf; + } + + if ((y = strstarts(z, "ctxt")) && isspace(*y)) { + if ( (t = poparser_clean(p, msg)) != po_success) + return t; + + if (msg->id_len || msg->plural) + return -po_invalid_entry; + + for (cnt = 0; cnt < st_max; cnt++) { + if (strstr(x, sysdep_str[cnt])) { + msg->sysdep[cnt] = sysdep_repl[cnt][0][0]; + } + } + + if (p->stage == ps_parse) { + if (msg->ctxt == NULL) { + return -po_internal; + } + + len = unescape(x, msg->ctxt, p->max_ctxt_len); + } + + msg->ctxt_len = len; + p->previous = po_ctxt; + } else if ((y = strstarts(z, "id")) && isspace(*y)) { + if ( (t = poparser_clean(p, msg)) != po_success) + return t; + + if (msg->plural_len) + return -po_invalid_entry; + + for (cnt = 0; cnt < st_max; cnt++) { + if (strstr(x, sysdep_str[cnt])) { + msg->sysdep[cnt] = sysdep_repl[cnt][0][0]; + } + } + + if (p->stage == ps_parse) { + if (msg->id == NULL) { + return -po_internal; + } + + len = unescape(x, msg->id, p->max_id_len); + } + + msg->id_len = len; + p->previous = po_id; + } else if ((y = strstarts(z, "id_plural")) && isspace(*y)) { + if (!msg->id_len || p->strcnt) + return -po_invalid_entry; + + if (p->stage == ps_parse) { + if (msg->plural == NULL) { + return -po_internal; + } + + len = unescape(x, msg->plural, p->max_plural_len); + } + + msg->plural_len = len; + p->previous = po_plural; + } else if ((y = strstarts(z, "str"))) { + if (!msg->id_len && !p->first) + return -po_invalid_entry; + + if (isspace(*y)) { + if (p->strcnt || msg->plural_len) + return -po_invalid_entry; + + cnt = (p->strcnt = 1) - 1; + } else if (*y == '[') { + if (!msg->plural_len) + return -po_invalid_entry; + + if (y[2] != ']' || !isspace(y[3])) return -po_excepted_token; + + p->strcnt = (cnt = y[1] - '0') + 1; + + if (p->strcnt > p->hdr.nplurals) { + return -po_plurals_overflow; + } + } else { + return -po_excepted_token; + } + + if (p->stage == ps_parse) { + if (msg->str[cnt] == NULL) { + return -po_internal; + } + + len = unescape(x, msg->str[cnt], p->max_strlen[cnt]); + } + + msg->strlen[cnt] = len; + p->previous = po_str; + } else { + return -po_invalid_entry; + } } - if(type != pe_str) { - p->prev_type = type; + + if (p->stage == ps_size) { + if (p->max_strlen[cnt] < msg->strlen[cnt]) + p->max_strlen[cnt] = msg->strlen[cnt] + 1; + if (p->max_plural_len < msg->plural_len) + p->max_plural_len = msg->plural_len + 1; + if (p->max_id_len < msg->id_len) + p->max_id_len = msg->id_len + 1; + if (p->max_ctxt_len < msg->ctxt_len) + p->max_ctxt_len = msg->ctxt_len + 1; } - return 0; + + return po_success; } -int poparser_finish(struct po_parser *p) { - char empty[4] = ""; - return poparser_feed_line(p, empty, sizeof(empty)); +enum po_error poparser_finish(struct po_parser *p) { + size_t len; + int cnt; + enum po_error t; + po_message_t msg = &p->msg; + + if (p->stage == ps_size) { + if ( (t = poparser_clean(p, msg)) != po_success) + return t; + + len = p->max_ctxt_len; + len += p->max_id_len; + len += p->max_plural_len; + for (cnt = 0; cnt < MAX_NPLURALS; cnt++) + len += p->max_strlen[cnt]; + + memset(msg, 0, sizeof(struct po_message)); + msg->ctxt = (char*)malloc(len); + msg->id = msg->ctxt + p->max_ctxt_len; + msg->plural = msg->id + p->max_id_len; + msg->str[0] = msg->plural + p->max_plural_len; + for (cnt = 1; cnt < MAX_NPLURALS; cnt++) + msg->str[cnt] = msg->str[cnt-1] + p->max_strlen[cnt-1]; + + p->hdr.nplurals = 2; + } else { + if ( (t = poparser_clean(p, msg)) != po_success) + return t; + if (msg->ctxt) free(msg->ctxt); + if (p->cd) iconv_close(p->cd); + } + + if (p->stage < ps_parse) p->stage++; + + return po_success; +} + +size_t poparser_sysdep(const char *in, char *out, int cnt[]) { + const char *x, *y, *outs; + int n; + outs = out; + x = in; + + for (n=0; n < st_max;) { + if ((y = strstr(x, sysdep_str[n])) && *(y-1) == '%') { + if (outs) + memcpy(out, x, y-x); + out += y-x; + x = y + strlen(sysdep_str[n]); + + y = sysdep_repl[n][cnt[n]+1]; + if (outs) + memcpy(out, y, strlen(y)); + out += strlen(y); + + n = 0; + } else n++; + } + + if (outs) + memcpy(out, x, strlen(x) + 1); + out += strlen(x) + 1; + return out - outs; } diff --git a/src/poparser.h b/src/poparser.h index 29b7b16..1e4a589 100644 --- a/src/poparser.h +++ b/src/poparser.h @@ -1,41 +1,97 @@ #ifndef POPARSER_H #define POPARSER_H -#include <unistd.h> -enum po_entry { - pe_msgid = 0, - pe_plural, - pe_ctxt, - pe_msgstr, - pe_maxstr, - pe_str = pe_maxstr, - pe_invalid, - pe_max, +#include <iconv.h> +#include <stdbool.h> +#include <stdlib.h> + +#define MAX_NPLURALS 6 + +enum sysdep_types { + st_priu32 = 0, + st_priu64, + st_priumax, + st_max }; -struct po_info { - enum po_entry type; - char *text; +// make sure out has equal or more space than in +size_t poparser_sysdep(const char *in, char *out, int cnt[]); + +struct po_header { char charset[12]; - unsigned int nplurals; - size_t textlen; + unsigned nplurals; + // maybe parse the header later +}; + +#define PO_FUZZY 1u + +struct po_message { + char *ctxt; + char *id; + char *plural; + char* str[MAX_NPLURALS]; + + int sysdep[st_max]; + size_t ctxt_len; + size_t id_len; + size_t plural_len; + size_t strlen[MAX_NPLURALS]; + // h.......1.0 + // |-------|a| + // |.......|a| + int flags; }; +typedef struct po_message *po_message_t; -typedef int (*poparser_callback)(struct po_info* info, void* user); +typedef int (*poparser_callback)(po_message_t msg, void* user); + +enum po_stage { + // collect size of every msg + ps_size = 0, + // parse + ps_parse, + ps_max = ps_parse, +}; + +enum po_entry { + po_ctxt = 0, + po_id, + po_plural, + po_str, +}; struct po_parser { - struct po_info info; + struct po_header hdr; + struct po_message msg; + enum po_stage stage; + + // private parts + bool first; + iconv_t cd; + enum po_entry previous; + int strcnt; + size_t max_ctxt_len; + size_t max_id_len; + size_t max_plural_len; + size_t max_strlen[MAX_NPLURALS]; char *buf; size_t bufsize; - enum po_entry prev_type; - enum po_entry prev_rtype; - unsigned curr_len; poparser_callback cb; void *cbdata; }; +enum po_error { + po_success = 0, + po_unsupported_charset, + po_failed_iconv, + po_excepted_token, + po_plurals_overflow, + po_invalid_entry, + po_internal, +}; + void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata); -int poparser_feed_line(struct po_parser *p, char* line, size_t buflen); -int poparser_finish(struct po_parser *p); +enum po_error poparser_feed_line(struct po_parser *p, char* line, size_t buflen); +enum po_error poparser_finish(struct po_parser *p); #endif |