summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxhe <xw897002528@gmail.com>2017-04-02 12:31:31 +0800
committerxhe <xw897002528@gmail.com>2017-05-01 12:27:38 +0800
commit916f59f2a3361bbc1fc5a1ba1c1f76006de490fd (patch)
treee1bdb7cd277ad58010d2f6c731eed93eadccd01a
parent09c11ed0f65885bd491cee2041a939b26f28bdd1 (diff)
downloadgettext-tiny-916f59f2a3361bbc1fc5a1ba1c1f76006de490fd.tar.gz
support converting to UTF-8
-rw-r--r--src/poparser.c37
-rw-r--r--src/poparser.h1
2 files changed, 32 insertions, 6 deletions
diff --git a/src/poparser.c b/src/poparser.c
index 2494342..ecb623a 100644
--- a/src/poparser.c
+++ b/src/poparser.c
@@ -2,13 +2,14 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
+#include <iconv.h>
#include "poparser.h"
#include "StringEscape.h"
#define streq(A, B) (!strcmp(A, B))
#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
-static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart) {
+static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) {
enum po_entry result_type;
char *x, *y;
size_t start = (size_t) lp;
@@ -31,6 +32,17 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart
conv:
*stringstart = ((size_t) x - start) + 1;
} else if(*lp == '"') {
+ if(!(*info->charset)) {
+ if(x = strstr(lp, "charset=")) {
+ // charset=xxx\\n
+ int len = strlen(x+=8) - 4;
+ assert(len <= 11);
+ if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) {
+ memcpy(info->charset, x, len);
+ info->charset[len] = 0;
+ }
+ }
+ }
result_type = pe_str;
x = lp;
goto conv;
@@ -42,7 +54,7 @@ static enum po_entry get_type_and_start(char* lp, char* end, size_t *stringstart
/* expects a pointer to the first char after a opening " in a string,
* converts the string into convbuf, and returns the length of that string */
-static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t convbuflen) {
+static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) {
size_t result = 0;
char* e = x + strlen(x);
assert(e > x && e < end && *e == 0);
@@ -50,7 +62,19 @@ static size_t get_length_and_convert(char* x, char* end, char* convbuf, size_t c
while(isspace(*e)) e--;
if(*e != '"') abort();
*e = 0;
- result = unescape(x, convbuf, convbuflen);
+ char *s;
+ if(*info->charset) {
+ iconv_t ret = iconv_open("UTF-8", info->charset);
+ if(ret != (iconv_t)-1) {
+ size_t a=end-x, b=a*4;
+ char mid[b], *midp=mid;
+ iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b);
+ if(s = strstr(mid, "charset="))
+ memcpy(s+8, "UTF-8\\n\0", 8);
+ result = unescape(mid, convbuf, convbuflen);
+ // iconv doesnt recognize the encoding
+ } else result = unescape(x, convbuf, convbuflen);
+ } else result = unescape(x, convbuf, convbuflen);
return result;
}
@@ -62,6 +86,7 @@ void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_
p->prev_type = pe_invalid;
p->curr_len = 0;
p->cbdata = cbdata;
+ *(p->info.charset) = 0;
}
enum lineactions {
@@ -108,11 +133,11 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
enum po_entry type;
- type = get_type_and_start(line, line + buflen, &strstart);
+ type = get_type_and_start(&p->info, line, line + buflen, &strstart);
switch(action_tbl[p->prev_type][type]) {
case la_incr:
assert(type == pe_msgid || type == pe_msgstr || type == pe_str);
- p->curr_len += get_length_and_convert(line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
+ p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
break;
case la_proc:
assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr);
@@ -121,7 +146,7 @@ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
p->info.type = p->prev_type;
p->cb(&p->info, p->cbdata);
if(type != pe_invalid)
- p->curr_len = get_length_and_convert(line + strstart, line + buflen, convbuf, convbuflen);
+ p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen);
else
p->curr_len = 0;
break;
diff --git a/src/poparser.h b/src/poparser.h
index e29594f..25e3af5 100644
--- a/src/poparser.h
+++ b/src/poparser.h
@@ -14,6 +14,7 @@ enum po_entry {
struct po_info {
enum po_entry type;
char *text;
+ char charset[12];
size_t textlen;
};