diff options
author | rofl0r <retnyg@gmx.net> | 2012-12-25 06:03:24 +0100 |
---|---|---|
committer | rofl0r <retnyg@gmx.net> | 2012-12-25 06:03:48 +0100 |
commit | 92e039f526ef7988a9b8cf500fd45a3a5b045ced (patch) | |
tree | eb6f9945eec57d64c0eaf0c2cc885e5acf4311c2 | |
parent | ddf6dfea27f4a7d57ed73a4f5a146b6b704e9103 (diff) | |
download | gettext-tiny-0.0.2.tar.gz |
fix assertion error on huge string found in gnumericv0.0.2
the culprit is:
gnumeric-1.10.17/po-functions/dz.po
basically the code was wrongly checking for the end of the convert
buffer, instead of the line buffer.
this is no problem per se because the line buffer gets reset more
often, but when the convertbuffer is bigger (like after this change)
it wouldnt work correctly.
i put the convert_buf functions into a separate translation unit
and renamed them to escape and unescape to make it more clear
what they're doing. also new: size checks for the escape functions.
currently it will silently truncate the strings when the buffer
runs out of space. this is sufficient for no-op style, but should
be fixed at some point.
-rw-r--r-- | Makefile | 5 | ||||
-rw-r--r-- | src/StringEscape.c | 111 | ||||
-rw-r--r-- | src/StringEscape.h | 7 | ||||
-rw-r--r-- | src/msgfmt.c | 2 | ||||
-rw-r--r-- | src/msgmerge.c | 60 | ||||
-rw-r--r-- | src/poparser.c | 64 |
6 files changed, 132 insertions, 117 deletions
@@ -7,6 +7,7 @@ sysconfdir=$(prefix)/etc LIBSRC = $(sort $(wildcard libintl/*.c)) PROGSRC = $(sort $(wildcard src/*.c)) +PARSEROBJS = src/poparser.o src/StringEscape.o PROGOBJS = $(PROGSRC:.c=.o) LIBOBJS = $(LIBSRC:.c=.o) OBJS = $(PROGOBJS) $(LIBOBJS) @@ -46,10 +47,10 @@ libintl.a: $(LIBOBJS) $(RANLIB) $@ msgmerge: $(OBJS) - $(CC) $(LDFLAGS) -static -o $@ src/msgmerge.o src/poparser.o + $(CC) $(LDFLAGS) -static -o $@ src/msgmerge.o $(PARSEROBJS) msgfmt: $(OBJS) - $(CC) $(LDFLAGS) -static -o $@ src/msgfmt.o src/poparser.o + $(CC) $(LDFLAGS) -static -o $@ src/msgfmt.o $(PARSEROBJS) xgettext: cp src/xgettext.sh ./xgettext diff --git a/src/StringEscape.c b/src/StringEscape.c new file mode 100644 index 0000000..3a9ddd6 --- /dev/null +++ b/src/StringEscape.c @@ -0,0 +1,111 @@ +#include <stddef.h> + +//FIXME out gets silently truncated if outsize is too small + +size_t escape(char* in, char* out, size_t outsize) { + size_t l = 0; + while(*in && l + 3 < outsize) { + switch(*in) { + case '\n': + *out++ = '\\'; + l++; + *out = 'n'; + break; + case '\r': + *out++ = '\\'; + l++; + *out = 'r'; + break; + case '\t': + *out++ = '\\'; + l++; + *out = 't'; + break; + case '\\': + *out++ = '\\'; + l++; + *out = '\\'; + break; + case '"': + *out++ = '\\'; + l++; + *out = '"'; + break; + case '\v': + *out++ = '\\'; + l++; + *out = '\v'; + break; + case '\?': + *out++ = '\\'; + l++; + *out = '\?'; + break; + case '\f': + *out++ = '\\'; + l++; + *out = '\f'; + break; + default: + *out = *in; + } + in++; + out++; + l++; + } + *out = 0; + return l; +} +#include <assert.h> +#include <stdlib.h> +size_t unescape(char* in, char *out, size_t outsize) { + size_t l = 0; + while(*in && l + 2 < outsize) { + switch (*in) { + case '\\': + ++in; + assert(*in); + switch(*in) { + case 'n': + *out='\n'; + break; + case 'r': + *out='\r'; + break; + case 't': + *out='\t'; + break; + case '\\': + *out='\\'; + break; + case '"': + *out='"'; + break; + case 'v': + *out='\v'; + break; + case '\?': + *out = '\?'; + break; + case 'f': + *out = '\f'; + break; + case '\'': + *out = '\''; + break; + // FIXME add handling of hex and octal + default: + abort(); + } + break; + default: + *out=*in; + } + in++; + out++; + l++; + } + *out = 0; + return l; +} + diff --git a/src/StringEscape.h b/src/StringEscape.h new file mode 100644 index 0000000..fc76482 --- /dev/null +++ b/src/StringEscape.h @@ -0,0 +1,7 @@ +#ifndef STRINGESCAPE_H +#define STRINGESCAPE_H +#include <stddef.h> +size_t escape(char* in, char *out, size_t outsize); +size_t unescape(char* in, char *out, size_t outsize); +//RcB: DEP "StringEscape.c" +#endif diff --git a/src/msgfmt.c b/src/msgfmt.c index 3961fa3..bf5f343 100644 --- a/src/msgfmt.c +++ b/src/msgfmt.c @@ -129,7 +129,7 @@ int process_line_callback(struct po_info* info, void* user) { int process(FILE *in, FILE *out) { struct mo_hdr mohdr = def_hdr; char line[4096]; char *lp; - char convbuf[4096]; + char convbuf[16384]; struct callbackdata d = { .num = { diff --git a/src/msgmerge.c b/src/msgmerge.c index 15a3e2e..977f168 100644 --- a/src/msgmerge.c +++ b/src/msgmerge.c @@ -6,6 +6,7 @@ #include <ctype.h> #include <assert.h> #include "poparser.h" +#include "StringEscape.h" __attribute__((noreturn)) static void syntax(void) { @@ -37,61 +38,6 @@ struct fiLes { FILE *compend; }; -size_t convert_buf(char* in, char* out) { - size_t l = 0; - while(*in) { - switch(*in) { - case '\n': - *out++ = '\\'; - l++; - *out = 'n'; - break; - case '\r': - *out++ = '\\'; - l++; - *out = 'r'; - break; - case '\t': - *out++ = '\\'; - l++; - *out = 't'; - break; - case '\\': - *out++ = '\\'; - l++; - *out = '\\'; - break; - case '"': - *out++ = '\\'; - l++; - *out = '"'; - break; - case '\v': - *out++ = '\\'; - l++; - *out = '\v'; - break; - case '\?': - *out++ = '\\'; - l++; - *out = '\?'; - break; - case '\f': - *out++ = '\\'; - l++; - *out = '\f'; - break; - default: - *out = *in; - } - in++; - out++; - l++; - } - *out = 0; - return l; -} - /* currently we only output input strings as output strings * i.e. there is no translation lookup at all */ int process_line_callback(struct po_info* info, void* user) { @@ -99,7 +45,7 @@ int process_line_callback(struct po_info* info, void* user) { FILE* out = (FILE*) user; size_t l; if(info->type == pe_msgid) { - l = convert_buf(info->text, convbuf); + l = escape(info->text, convbuf, sizeof(convbuf)); fprintf(out, "msgid \"%s\"\nmsgstr \"%s\"\n", convbuf, convbuf); } return 0; @@ -108,7 +54,7 @@ int process_line_callback(struct po_info* info, void* user) { int process(struct fiLes *files, int update, int backup) { (void) update; (void) backup; struct po_parser pb, *p = &pb; - char line[4096], conv[4096], *lb; + char line[4096], conv[8192], *lb; poparser_init(p, conv, sizeof(conv), process_line_callback, files->out); while((lb = fgets(line, sizeof(line), files->pot))) { poparser_feed_line(p, lb, sizeof(line) - (size_t)(lb - line)); diff --git a/src/poparser.c b/src/poparser.c index 87d7a5c..e677613 100644 --- a/src/poparser.c +++ b/src/poparser.c @@ -3,61 +3,11 @@ #include <stdlib.h> #include <string.h> #include "poparser.h" +#include "StringEscape.h" #define streq(A, B) (!strcmp(A, B)) #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) -static size_t convertbuf(char* in, char *out) { - size_t l = 0; - while(*in) { - switch (*in) { - case '\\': - ++in; - assert(*in); - switch(*in) { - case 'n': - *out='\n'; - break; - case 'r': - *out='\r'; - break; - case 't': - *out='\t'; - break; - case '\\': - *out='\\'; - break; - case '"': - *out='"'; - break; - case 'v': - *out='\v'; - break; - case '\?': - *out = '\?'; - break; - case 'f': - *out = '\f'; - break; - case '\'': - *out = '\''; - break; - // FIXME add handling of hex and octal - default: - abort(); - } - break; - default: - *out=*in; - } - in++; - out++; - l++; - } - *out = 0; - return l; -} - static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart) { enum po_entry result_type; char *x, *y; @@ -92,7 +42,7 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart /* expects a pointer to the first char after a opening " in a string, * converts the string into convbuf, and returns the length of that string */ -static size_t get_length_and_convert(char* x, char* end, char* convbuf) { +static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) { size_t result = 0; char* e = x + strlen(x); assert(e > x && e < end && *e == 0); @@ -100,7 +50,7 @@ static size_t get_length_and_convert(char* x, char* end, char* convbuf) { while(isspace(*e)) e--; if(*e != '"') abort(); *e = 0; - result = convertbuf(x, convbuf); + result = unescape(x, convbuf, convbuflen); return result; } @@ -124,8 +74,8 @@ enum lineactions { /* return 0 on success */ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { - char *lp = line; char *convbuf = p->buf; + size_t convbuflen = p->bufsize; size_t strstart; static const enum lineactions action_tbl[pe_max][pe_max] = { @@ -158,11 +108,11 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { enum po_entry type; - type = get_type_and_start(lp, line + buflen, &strstart); + type = get_type_and_start(line, line + buflen, &strstart); switch(action_tbl[p->prev_type][type]) { case la_incr: assert(type == pe_msgid || type == pe_msgstr || type == pe_str); - p->curr_len += get_length_and_convert(lp + strstart, line + buflen - p->curr_len, convbuf + p->curr_len); + p->curr_len += get_length_and_convert(line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); break; case la_proc: assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr); @@ -171,7 +121,7 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { p->info.type = p->prev_type; p->cb(&p->info, p->cbdata); if(type != pe_invalid) - p->curr_len = get_length_and_convert(lp + strstart, line + buflen, convbuf); + p->curr_len = get_length_and_convert(line + strstart, line + buflen, convbuf, convbuflen); else p->curr_len = 0; break; |