summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxhe <xw897002528@gmail.com>2020-01-22 14:42:04 +0800
committerxhe <xw897002528@gmail.com>2020-01-22 22:03:27 +0800
commitc83067e03d2929f358dbdc10bea49d3df0e5e78e (patch)
tree208d3c4e98618f530b1fc9bb37d6a563178e2468
parentadaa9c64921e80f2b8dd3610ffb508618b9204f3 (diff)
downloadgettext-tiny-c83067e03d2929f358dbdc10bea49d3df0e5e78e.tar.gz
poparser: support feeds anything[1/2]
this is the first patch. It mainly solves the case like: `msgid "ss" "gg"`. This is a valid case(proved by GNU msgfmt). Just like C compiler, we should concat strings, treat that string as `"ssgg"`. Previous feed_line assume there's only one `"xxx"` string in one line. For example, when it parses `msgid "ss"`, whole line is marked as parsed, so `"gg"` is ignored. Now it turns into a loop and will consume exactly the part it parsed, so that all strings can be consumed.
-rw-r--r--src/poparser.c402
1 files changed, 206 insertions, 196 deletions
diff --git a/src/poparser.c b/src/poparser.c
index d7aafb1..2780372 100644
--- a/src/poparser.c
+++ b/src/poparser.c
@@ -112,6 +112,7 @@ static inline enum po_error poparser_clean(struct po_parser *p, po_message_t msg
enum po_error poparser_feed_line(struct po_parser *p, char* in, size_t in_len) {
char *line = in;
+ size_t line_pos;
size_t line_len = in_len;
po_message_t msg = &p->msg;
int cnt = 0;
@@ -119,132 +120,69 @@ enum po_error poparser_feed_line(struct po_parser *p, char* in, size_t in_len) {
size_t len;
char *x, *y, *z;
- if (line_len == 0 || line[0] == '\n') {
- // ignore blank lines
- return po_success;
- } else if (line[0] == '#') {
- if (p->previous == po_str) {
- if ( (t = poparser_clean(p, msg)) != po_success)
- return t;
- }
+ // if we need to conv encodings
+ if (p->cd) {
+ x = p->buf;
+ len = p->bufsize;
+ if (iconv(p->cd, &line, &line_len, &x, &len) == (size_t)-1)
+ return -po_failed_iconv;
- switch (line[1]) {
- case ',':
- x = &line[2];
- while (*x && (y = strpbrk(x, " ,\n"))) {
- if (y != x && !memcmp(x, "fuzzy", y-x)) {
- msg->flags |= PO_FUZZY;
- }
- x = y + strspn(y, " ,\n");
- }
- break;
- case '.':
- // extracted comments for translators, ignore
- case ':':
- // reference comments for translators, ignore
- case '|':
- // previous untranslated strings for translators, ignore
- default:
- // ignore normal comments
- return po_success;
- }
- } else if (line[0] == '"') {
- if ( (y = strrchr(x = &line[1], '"')) == NULL)
- return -po_excepted_token;
-
- len = y - x;
- *y = 0;
-
- if (p->cd) {
- line = x;
- line_len = len + 1;
- x = p->buf;
- len = p->bufsize;
- if (iconv(p->cd, &line, &line_len, &x, &len) == (size_t)-1)
- return -po_failed_iconv;
-
- if (line_len != 0)
- return -po_failed_iconv;
-
- len = x - p->buf;
- x = p->buf;
- }
+ if (line_len != 0)
+ return -po_failed_iconv;
- for (cnt = 0; cnt < st_max; cnt++) {
- if (strstr(x, sysdep_str[cnt])) {
- msg->sysdep |= sysdep[cnt];
- }
- }
-
- switch (p->previous) {
- case po_str:
- if ((t = poparser_feed_hdr(p, x)) != po_success) {
- return t;
- }
-
- cnt = p->strcnt - 1;
- if (p->stage == ps_parse) {
- len = unescape(x, &msg->str[cnt][msg->strlen[cnt]], p->max_strlen[cnt]);
- }
+ line_len = x - p->buf;
+ line = p->buf;
+ }
- msg->strlen[cnt] += len;
+ for (line_pos=0; line_pos < line_len;) {
+ switch (line[line_pos]) {
+ case '\n':
+ case ' ':
+ line_pos++;
break;
- case po_plural:
- if (p->stage == ps_parse) {
- len = unescape(x, &msg->plural[msg->plural_len], p->max_plural_len);
+ case '#':
+ if (p->previous == po_str) {
+ if ( (t = poparser_clean(p, msg)) != po_success)
+ return t;
}
- msg->plural_len += len;
- break;
- case po_id:
- if (p->stage == ps_parse) {
- len = unescape(x, &msg->id[msg->id_len], p->max_id_len);
+ switch (line[line_pos+1]) {
+ case ',':
+ x = &line[line_pos+2];
+ while (*x && (y = strpbrk(x, " ,\n"))) {
+ if (y != x && !memcmp(x, "fuzzy", y-x)) {
+ msg->flags |= PO_FUZZY;
+ }
+ x = y + strspn(y, " ,\n");
+ }
+ break;
+ case '.':
+ // extracted comments for translators, ignore
+ case ':':
+ // reference comments for translators, ignore
+ case '|':
+ // previous untranslated strings for translators, ignore
+ default:
+ // ignore normal comments
+ break;
}
- msg->id_len += len;
+ // whole line is commented
+ line_pos = line_len;
break;
- case po_ctxt:
- if (p->stage == ps_parse) {
- len = unescape(x, &msg->ctxt[msg->ctxt_len], p->max_ctxt_len);
+ case '"':
+ y = x = &line[line_pos+1];
+ while (true) {
+ if ( (y = strchr(y, '"')) == NULL)
+ return -po_excepted_token;
+
+ // only if it's not an escaped "
+ if (*(y-1) != '\\') break;
}
- msg->ctxt_len += len;
- break;
- default:
- return -po_invalid_entry;
- }
- } else if ((z = strstarts(line, "msg"))) {
- if ( (x = strchr(z, '"')) == NULL)
- return -po_excepted_token;
-
- if ( (y = strrchr(x+1, '"')) == NULL)
- return -po_excepted_token;
-
- len = y - ++x;
- *y = 0;
-
- if (p->cd) {
- line = x;
- line_len = len + 1;
- x = p->buf;
- len = p->bufsize;
-
- if (iconv(p->cd, &line, &line_len, &x, &len) == (size_t)-1)
- return -po_failed_iconv;
-
- if (line_len != 0)
- return -po_failed_iconv;
-
- len = x - p->buf;
- x = p->buf;
- }
-
- if ((y = strstarts(z, "ctxt")) && isspace(*y)) {
- if ( (t = poparser_clean(p, msg)) != po_success)
- return t;
-
- if (msg->id_len || msg->plural_len)
- return -po_invalid_entry;
+ len = y - x;
+ *y = 0;
+ line_pos += len + 2;
for (cnt = 0; cnt < st_max; cnt++) {
if (strstr(x, sysdep_str[cnt])) {
@@ -252,105 +190,177 @@ enum po_error poparser_feed_line(struct po_parser *p, char* in, size_t in_len) {
}
}
- if (p->stage == ps_parse) {
- if (msg->ctxt == NULL) {
- return -po_internal;
+ switch (p->previous) {
+ case po_str:
+ if ((t = poparser_feed_hdr(p, x)) != po_success) {
+ return t;
}
- len = unescape(x, msg->ctxt, p->max_ctxt_len);
- }
-
- msg->ctxt_len = len;
- p->previous = po_ctxt;
- } else if ((y = strstarts(z, "id")) && isspace(*y)) {
- if ( (t = poparser_clean(p, msg)) != po_success)
- return t;
-
- if (msg->plural_len)
- return -po_invalid_entry;
-
- for (cnt = 0; cnt < st_max; cnt++) {
- if (strstr(x, sysdep_str[cnt])) {
- msg->sysdep |= sysdep[cnt];
+ cnt = p->strcnt - 1;
+ if (p->stage == ps_parse) {
+ len = unescape(x, &msg->str[cnt][msg->strlen[cnt]], p->max_strlen[cnt]);
}
- }
- if (p->stage == ps_parse) {
- if (msg->id == NULL) {
- return -po_internal;
+ msg->strlen[cnt] += len;
+ break;
+ case po_plural:
+ if (p->stage == ps_parse) {
+ len = unescape(x, &msg->plural[msg->plural_len], p->max_plural_len);
}
- len = unescape(x, msg->id, p->max_id_len);
- }
-
- msg->id_len = len;
- p->previous = po_id;
- } else if ((y = strstarts(z, "id_plural")) && isspace(*y)) {
- if (!msg->id_len || p->strcnt)
- return -po_invalid_entry;
-
- if (p->stage == ps_parse) {
- if (msg->plural == NULL) {
- return -po_internal;
+ msg->plural_len += len;
+ break;
+ case po_id:
+ if (p->stage == ps_parse) {
+ len = unescape(x, &msg->id[msg->id_len], p->max_id_len);
}
- len = unescape(x, msg->plural, p->max_plural_len);
- }
+ msg->id_len += len;
+ break;
+ case po_ctxt:
+ if (p->stage == ps_parse) {
+ len = unescape(x, &msg->ctxt[msg->ctxt_len], p->max_ctxt_len);
+ }
- msg->plural_len = len;
- p->previous = po_plural;
- } else if ((y = strstarts(z, "str"))) {
- if (!msg->id_len && !p->first)
+ msg->ctxt_len += len;
+ break;
+ default:
return -po_invalid_entry;
+ }
- if (isspace(*y)) {
- if (p->strcnt || msg->plural_len)
- return -po_invalid_entry;
-
- cnt = (p->strcnt = 1) - 1;
- } else if (*y == '[') {
- if (!msg->plural_len)
- return -po_invalid_entry;
-
- if (y[2] != ']' || !isspace(y[3])) return -po_excepted_token;
+ break;
+ default:
+ if ((z = strstarts(&line[line_pos], "msg"))) {
+ if ( (x = strchr(z, '"')) == NULL)
+ return -po_excepted_token;
- p->strcnt = (cnt = y[1] - '0') + 1;
+ y = ++x;
+ while (true) {
+ if ( (y = strchr(y, '"')) == NULL)
+ return -po_excepted_token;
- if (p->strict && p->strcnt > p->hdr.nplurals) {
- return -po_plurals_overflow;
+ // only if it's not an escaped "
+ if (*(y-1) != '\\') break;
}
- } else {
- return -po_excepted_token;
- }
- if ((t = poparser_feed_hdr(p, x)) != po_success) {
- return t;
- }
-
- if (p->stage == ps_parse) {
- if (msg->str[cnt] == NULL) {
- return -po_internal;
+ len = y - x;
+ *y = 0;
+ line_pos += y - &line[line_pos] + 1;
+
+ if ((y = strstarts(z, "ctxt")) && isspace(*y)) {
+ if ( (t = poparser_clean(p, msg)) != po_success)
+ return t;
+
+ if (msg->id_len || msg->plural_len)
+ return -po_invalid_entry;
+
+ for (cnt = 0; cnt < st_max; cnt++) {
+ if (strstr(x, sysdep_str[cnt])) {
+ msg->sysdep |= sysdep[cnt];
+ }
+ }
+
+ if (p->stage == ps_parse) {
+ if (msg->ctxt == NULL) {
+ return -po_internal;
+ }
+
+ len = unescape(x, msg->ctxt, p->max_ctxt_len);
+ }
+
+ msg->ctxt_len = len;
+ p->previous = po_ctxt;
+ } else if ((y = strstarts(z, "id")) && isspace(*y)) {
+ if ( (t = poparser_clean(p, msg)) != po_success)
+ return t;
+
+ if (msg->plural_len)
+ return -po_invalid_entry;
+
+ for (cnt = 0; cnt < st_max; cnt++) {
+ if (strstr(x, sysdep_str[cnt])) {
+ msg->sysdep |= sysdep[cnt];
+ }
+ }
+
+ if (p->stage == ps_parse) {
+ if (msg->id == NULL) {
+ return -po_internal;
+ }
+
+ len = unescape(x, msg->id, p->max_id_len);
+ }
+
+ msg->id_len = len;
+ p->previous = po_id;
+ } else if ((y = strstarts(z, "id_plural")) && isspace(*y)) {
+ if (!msg->id_len || p->strcnt)
+ return -po_invalid_entry;
+
+ if (p->stage == ps_parse) {
+ if (msg->plural == NULL) {
+ return -po_internal;
+ }
+
+ len = unescape(x, msg->plural, p->max_plural_len);
+ }
+
+ msg->plural_len = len;
+ p->previous = po_plural;
+ } else if ((y = strstarts(z, "str"))) {
+ if (!msg->id_len && !p->first)
+ return -po_invalid_entry;
+
+ if (isspace(*y)) {
+ if (p->strcnt || msg->plural_len)
+ return -po_invalid_entry;
+
+ cnt = (p->strcnt = 1) - 1;
+ } else if (*y == '[') {
+ if (!msg->plural_len)
+ return -po_invalid_entry;
+
+ if (y[2] != ']' || !isspace(y[3])) return -po_excepted_token;
+
+ p->strcnt = (cnt = y[1] - '0') + 1;
+
+ if (p->strict && p->strcnt > p->hdr.nplurals) {
+ return -po_plurals_overflow;
+ }
+ } else {
+ return -po_excepted_token;
+ }
+
+ if ((t = poparser_feed_hdr(p, x)) != po_success) {
+ return t;
+ }
+
+ if (p->stage == ps_parse) {
+ if (msg->str[cnt] == NULL) {
+ return -po_internal;
+ }
+
+ len = unescape(x, msg->str[cnt], p->max_strlen[cnt]);
+ }
+
+ msg->strlen[cnt] = len;
+ p->previous = po_str;
+ } else {
+ return -po_invalid_entry;
}
-
- len = unescape(x, msg->str[cnt], p->max_strlen[cnt]);
}
-
- msg->strlen[cnt] = len;
- p->previous = po_str;
- } else {
- return -po_invalid_entry;
}
- }
- if (p->stage == ps_size) {
- if (p->max_strlen[cnt] < msg->strlen[cnt])
- p->max_strlen[cnt] = msg->strlen[cnt] + 1;
- if (p->max_plural_len < msg->plural_len)
- p->max_plural_len = msg->plural_len + 1;
- if (p->max_id_len < msg->id_len)
- p->max_id_len = msg->id_len + 1;
- if (p->max_ctxt_len < msg->ctxt_len)
- p->max_ctxt_len = msg->ctxt_len + 1;
+ if (p->stage == ps_size) {
+ if (p->max_strlen[cnt] < msg->strlen[cnt])
+ p->max_strlen[cnt] = msg->strlen[cnt] + 1;
+ if (p->max_plural_len < msg->plural_len)
+ p->max_plural_len = msg->plural_len + 1;
+ if (p->max_id_len < msg->id_len)
+ p->max_id_len = msg->id_len + 1;
+ if (p->max_ctxt_len < msg->ctxt_len)
+ p->max_ctxt_len = msg->ctxt_len + 1;
+ }
}
return po_success;
@@ -417,7 +427,7 @@ size_t poparser_sysdep(const char *in, char *out, int num) {
m = strlen(y);
if (outs)
memcpy(out, y, m);
- out += m;
+ out += m;
break;
}