diff options
author | rofl0r <retnyg@gmx.net> | 2012-09-18 16:00:05 +0200 |
---|---|---|
committer | rofl0r <retnyg@gmx.net> | 2012-09-18 16:05:42 +0200 |
commit | 4729eeff907e69616d07a2fdb405b4ddbb171521 (patch) | |
tree | 47dbb70324cb7f8027ff53d67ab08aae75f0aa26 | |
parent | 1c060deff92f6f360021a4de49db72c5ac322230 (diff) | |
download | gettext-tiny-4729eeff907e69616d07a2fdb405b4ddbb171521.tar.gz |
rip out poparser from msgfmt
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | src/msgfmt.c | 275 | ||||
-rw-r--r-- | src/poparser.c | 181 | ||||
-rw-r--r-- | src/poparser.h | 36 |
4 files changed, 290 insertions, 211 deletions
@@ -5,11 +5,12 @@ libdir=$(prefix)/lib sysconfdir=$(prefix)/etc LIBSRC = $(sort $(wildcard libintl/*.c)) +PROGSRC = $(sort $(wildcard src/*.c)) -SRCS = $(LIBSRC) -OBJS = $(SRCS:.c=.o) +PROGOBJS = $(PROGSRC:.c=.o) LIBOBJS = $(LIBSRC:.c=.o) + HEADERS = libintl.h ALL_INCLUDES = $(HEADERS) @@ -43,8 +44,8 @@ libintl.a: $(LIBOBJS) $(AR) rc $@ $(LIBOBJS) $(RANLIB) $@ -msgfmt: - $(CC) $(BUILDCFLAGS) -static -o $@ src/$@.c +$(ALL_TOOLS): $(PROGOBJS) + $(CC) $(LDFLAGS) -static -o $@ $< src/poparser.o $(DESTDIR)$(libdir)/%.a: %.a diff --git a/src/msgfmt.c b/src/msgfmt.c index 0df027d..4b7d37c 100644 --- a/src/msgfmt.c +++ b/src/msgfmt.c @@ -5,6 +5,7 @@ #include <string.h> #include <ctype.h> #include <assert.h> +#include "poparser.h" void syntax(void) { fprintf(stdout, @@ -43,97 +44,6 @@ const struct mo_hdr def_hdr = { 0, }; -size_t convertbuf(char* in, char *out) { - size_t l = 0; - while(*in) { - switch (*in) { - case '\\': - ++in; - assert(*in); - switch(*in) { - case 'n': - *out='\n'; - break; - case 'r': - *out='\r'; - break; - case 't': - *out='\t'; - break; - case '\\': - *out='\\'; - break; - case '"': - *out='"'; - break; - default: - abort(); - } - break; - default: - *out=*in; - } - in++; - out++; - l++; - } - *out = 0; - return l; -} - -enum plr_type { - plr_msgid = 0, - plr_msgstr, - plr_str, - plr_invalid, - plr_max, -}; - -enum plr_type get_type_and_start(char* lp, char* end, size_t *stringstart) { - enum plr_type result_type; - char *x, *y; - size_t start = (size_t) lp; - while(isspace(*lp) && lp < end) lp++; - if(lp[0] == '#') { - inv: - *stringstart = 0; - return plr_invalid; - } - if((y = strstarts(lp, "msg"))) { - if((x = strstarts(y, "id")) && (isspace(*x) || ((x = strstarts(x, "_plural")) && isspace(*x)))) - result_type = plr_msgid; - else if ((x = strstarts(y, "str")) && (isspace(*x) || - (x[0] == '[' && (x[1] == '0' || x[1] == '1') && x[2] == ']' && (x += 3) && isspace(*x)))) - result_type = plr_msgstr; - else - goto inv; - while(isspace(*x) && x < end) x++; - if(*x != '"') abort(); - conv: - *stringstart = ((size_t) x - start) + 1; - } else if(*lp == '"') { - result_type = plr_str; - x = lp; - goto conv; - } else { - goto inv; - } - return result_type; -} - -/* expects a pointer to the first char after a opening " in a string, - * converts the string into convbuf, and returns the length of that string */ -size_t get_length_and_convert(char* x, char* end, char* convbuf) { - size_t result = 0; - char* e = x + strlen(x); - assert(e > x && e < end && *e == 0); - e--; - while(isspace(*e)) e--; - if(*e != '"') abort(); - *e = 0; - result = convertbuf(x, convbuf); - return result; -} // pass 0: collect numbers of strings, calculate size and offsets for tables // print header @@ -152,143 +62,94 @@ enum passes { pass_max, }; -enum lineactions { - la_incr, - la_proc, - la_abort, - la_nop, - la_max, +struct callbackdata { + enum passes pass; + unsigned off; + FILE* out; + unsigned num[pe_maxstr]; + unsigned len[pe_maxstr]; }; + +int process_line_callback(struct po_info* info, void* user) { + struct callbackdata *d = (struct callbackdata *) user; + assert(info->type == pe_msgid || info->type == pe_msgstr); + switch(d->pass) { + case pass_collect_sizes: + d->num[info->type] += 1; + d->len[info->type] += info->textlen; + break; + case pass_print_string_offsets: + if(info->type == pe_msgstr) break; + write_offsets: + // print length of current string + fwrite(&info->textlen, sizeof(unsigned), 1, d->out); + // print offset of current string + fwrite(&d->off, sizeof(unsigned), 1, d->out); + d->off += info->textlen + 1; + break; + case pass_print_translation_offsets: + if(info->type == pe_msgid) break; + goto write_offsets; + case pass_print_strings: + if(info->type == pe_msgstr) break; + write_string: + fwrite(info->text, info->textlen + 1, 1, d->out); + break; + case pass_print_translations: + if(info->type == pe_msgid) break; + goto write_string; + break; + default: + abort(); + } + return 0; +} + int process(FILE *in, FILE *out) { struct mo_hdr mohdr = def_hdr; char line[4096]; char *lp; char convbuf[4096]; - unsigned off; - enum plr_type prev_type = plr_invalid; - unsigned curr_len = 0; - size_t strstart; - - unsigned num[plr_max] = { - [plr_msgid] = 0, - [plr_msgstr] = 0, - }; - unsigned len[plr_max] = { - [plr_msgid] = 0, - [plr_msgstr] = 0, - }; - static const enum lineactions action_tbl[plr_max][plr_max] = { - // plr_str will never be set as curr_type - [plr_str] = { - [plr_str] = la_abort, - [plr_msgid] = la_abort, - [plr_msgstr] = la_abort, - [plr_invalid] = la_abort, - }, - [plr_msgid] = { - [plr_str] = la_incr, - [plr_msgid] = la_proc, - [plr_msgstr] = la_proc, - [plr_invalid] = la_proc, - }, - [plr_msgstr] = { - [plr_str] = la_incr, - [plr_msgid] = la_proc, - [plr_msgstr] = la_proc, - [plr_invalid] = la_proc, + + struct callbackdata d = { + .num = { + [pe_msgid] = 0, + [pe_msgstr] = 0, }, - [plr_invalid] = { - [plr_str] = la_abort, - [plr_msgid] = la_incr, - [plr_msgstr] = la_incr, - [plr_invalid] = la_nop, + .len = { + [pe_msgid] = 0, + [pe_msgstr] = 0, }, + .off = 0, + .out = out, + .pass = pass_first, }; - - // increased in pass 0 to point to the strings section - // increased in pass 1 to point to the translation section - enum passes pass; - enum plr_type type; - int finished; + + struct po_parser pb, *p = &pb; mohdr.off_tbl_trans = mohdr.off_tbl_org; - for(pass = pass_first; pass < pass_max; pass++) { - if(pass == pass_second) { + for(d.pass = pass_first; d.pass < pass_max; d.pass++) { + if(d.pass == pass_second) { // start of second pass: // check that data gathered in first pass is consistent - if(num[plr_msgid] != num[plr_msgstr]) abort(); + if(d.num[pe_msgid] != d.num[pe_msgstr]) abort(); // calculate header fields from len and num arrays - mohdr.numstring = num[plr_msgid]; + mohdr.numstring = d.num[pe_msgid]; mohdr.off_tbl_org = sizeof(struct mo_hdr); - mohdr.off_tbl_trans = mohdr.off_tbl_org + num[plr_msgid] * (sizeof(unsigned)*2); + mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2); // print header fwrite(&mohdr, sizeof(mohdr), 1, out); // set offset startvalue - off = mohdr.off_tbl_trans + num[plr_msgstr] * (sizeof(unsigned)*2); + d.off = mohdr.off_tbl_trans + d.num[pe_msgstr] * (sizeof(unsigned)*2); } - finished = 0; + poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d); + while((lp = fgets(line, sizeof(line), in))) { - doline: - type = get_type_and_start(lp, line + sizeof(line), &strstart); - switch(action_tbl[prev_type][type]) { - case la_incr: - assert(type == plr_msgid || type == plr_msgstr || type == plr_str); - curr_len += get_length_and_convert(lp + strstart, line + sizeof(line) - curr_len, convbuf + curr_len); - break; - case la_proc: - assert(prev_type == plr_msgid || prev_type == plr_msgstr); - switch(pass) { - case pass_collect_sizes: - num[prev_type] += 1; - len[prev_type] += curr_len; - break; - case pass_print_string_offsets: - if(prev_type == plr_msgstr) break; - write_offsets: - // print length of current string - fwrite(&curr_len, sizeof(unsigned), 1, out); - // print offset of current string - fwrite(&off, sizeof(unsigned), 1, out); - off += curr_len + 1; - break; - case pass_print_translation_offsets: - if(prev_type == plr_msgid) break; - goto write_offsets; - case pass_print_strings: - if(prev_type == plr_msgstr) break; - write_string: - fwrite(convbuf, curr_len + 1, 1, out); - break; - case pass_print_translations: - if(prev_type == plr_msgid) break; - goto write_string; - break; - default: - abort(); - } - if(type != plr_invalid) - curr_len = get_length_and_convert(lp + strstart, line + sizeof(line), convbuf); - else - curr_len = 0; - break; - case la_nop: - break; - case la_abort: - default: - abort(); - } - if(type != plr_str) { - prev_type = type; - } - } - if(!finished) { - // we need to make an extra pass of type invalid to trigger - // processing of the last string. - lp = line; - *lp = 0; - finished = 1; - goto doline; + poparser_feed_line(p, lp, sizeof(line)); } + + poparser_finish(p); + fseek(in, 0, SEEK_SET); } return 0; diff --git a/src/poparser.c b/src/poparser.c new file mode 100644 index 0000000..e44e6e3 --- /dev/null +++ b/src/poparser.c @@ -0,0 +1,181 @@ +#include <ctype.h> +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include "poparser.h" + +#define streq(A, B) (!strcmp(A, B)) +#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) + +static size_t convertbuf(char* in, char *out) { + size_t l = 0; + while(*in) { + switch (*in) { + case '\\': + ++in; + assert(*in); + switch(*in) { + case 'n': + *out='\n'; + break; + case 'r': + *out='\r'; + break; + case 't': + *out='\t'; + break; + case '\\': + *out='\\'; + break; + case '"': + *out='"'; + break; + default: + abort(); + } + break; + default: + *out=*in; + } + in++; + out++; + l++; + } + *out = 0; + return l; +} + +static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart) { + enum po_entry result_type; + char *x, *y; + size_t start = (size_t) lp; + while(isspace(*lp) && lp < end) lp++; + if(lp[0] == '#') { + inv: + *stringstart = 0; + return pe_invalid; + } + if((y = strstarts(lp, "msg"))) { + if((x = strstarts(y, "id")) && (isspace(*x) || ((x = strstarts(x, "_plural")) && isspace(*x)))) + result_type = pe_msgid; + else if ((x = strstarts(y, "str")) && (isspace(*x) || + (x[0] == '[' && (x[1] == '0' || x[1] == '1') && x[2] == ']' && (x += 3) && isspace(*x)))) + result_type = pe_msgstr; + else + goto inv; + while(isspace(*x) && x < end) x++; + if(*x != '"') abort(); + conv: + *stringstart = ((size_t) x - start) + 1; + } else if(*lp == '"') { + result_type = pe_str; + x = lp; + goto conv; + } else { + goto inv; + } + return result_type; +} + +/* expects a pointer to the first char after a opening " in a string, + * converts the string into convbuf, and returns the length of that string */ +static size_t get_length_and_convert(char* x, char* end, char* convbuf) { + size_t result = 0; + char* e = x + strlen(x); + assert(e > x && e < end && *e == 0); + e--; + while(isspace(*e)) e--; + if(*e != '"') abort(); + *e = 0; + result = convertbuf(x, convbuf); + return result; +} + + +void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata) { + p->buf = workbuf; + p->bufsize = bufsize; + p->cb = cb; + p->prev_type = pe_invalid; + p->curr_len = 0; + p->cbdata = cbdata; +} + +enum lineactions { + la_incr, + la_proc, + la_abort, + la_nop, + la_max, +}; + +/* return 0 on success */ +int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { + char *lp = line; + char *convbuf = p->buf; + size_t strstart; + + static const enum lineactions action_tbl[pe_max][pe_max] = { + // pe_str will never be set as curr_type + [pe_str] = { + [pe_str] = la_abort, + [pe_msgid] = la_abort, + [pe_msgstr] = la_abort, + [pe_invalid] = la_abort, + }, + [pe_msgid] = { + [pe_str] = la_incr, + [pe_msgid] = la_proc, + [pe_msgstr] = la_proc, + [pe_invalid] = la_proc, + }, + [pe_msgstr] = { + [pe_str] = la_incr, + [pe_msgid] = la_proc, + [pe_msgstr] = la_proc, + [pe_invalid] = la_proc, + }, + [pe_invalid] = { + [pe_str] = la_abort, + [pe_msgid] = la_incr, + [pe_msgstr] = la_incr, + [pe_invalid] = la_nop, + }, + }; + + enum po_entry type; + + type = get_type_and_start(lp, line + buflen, &strstart); + switch(action_tbl[p->prev_type][type]) { + case la_incr: + assert(type == pe_msgid || type == pe_msgstr || type == pe_str); + p->curr_len += get_length_and_convert(lp + strstart, line + buflen - p->curr_len, convbuf + p->curr_len); + break; + case la_proc: + assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr); + p->info.text = convbuf; + p->info.textlen = p->curr_len; + p->info.type = p->prev_type; + p->cb(&p->info, p->cbdata); + if(type != pe_invalid) + p->curr_len = get_length_and_convert(lp + strstart, line + buflen, convbuf); + else + p->curr_len = 0; + break; + case la_nop: + break; + case la_abort: + default: + abort(); + // todo : return error code + } + if(type != pe_str) { + p->prev_type = type; + } + return 0; +} + +int poparser_finish(struct po_parser *p) { + char empty[4] = ""; + return poparser_feed_line(p, empty, sizeof(empty)); +} diff --git a/src/poparser.h b/src/poparser.h new file mode 100644 index 0000000..e29594f --- /dev/null +++ b/src/poparser.h @@ -0,0 +1,36 @@ +#ifndef POPARSER_H +#define POPARSER_H +#include <unistd.h> + +enum po_entry { + pe_msgid = 0, + pe_msgstr, + pe_maxstr, + pe_str = pe_maxstr, + pe_invalid, + pe_max, +}; + +struct po_info { + enum po_entry type; + char *text; + size_t textlen; +}; + +typedef int (*poparser_callback)(struct po_info* info, void* user); + +struct po_parser { + struct po_info info; + char *buf; + size_t bufsize; + enum po_entry prev_type; + unsigned curr_len; + poparser_callback cb; + void *cbdata; +}; + +void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata); +int poparser_feed_line(struct po_parser *p, char* line, size_t buflen); +int poparser_finish(struct po_parser *p); + +#endif |