diff options
Diffstat (limited to 'ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c')
-rw-r--r-- | ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c | 497 |
1 files changed, 497 insertions, 0 deletions
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c b/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c new file mode 100644 index 0000000..93a0fb3 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c @@ -0,0 +1,497 @@ +#include <stdio.h> +#include <stddef.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> + +struct mappings_entry { + int cp_uni; + int n; + int cp_932[16]; +}; + +struct mappings { + size_t n; + size_t nalloc; + struct mappings_entry *entries; +}; + +static void mappings_init(struct mappings *map) +{ + map->n = 0; + map->nalloc = 0; + map->entries = 0; +} + +static void mappings_destroy(struct mappings *map) +{ + if (map->entries) + free(map->entries); +} + +static int mappings_grow(struct mappings *map) +{ + if (map->n >= map->nalloc) { + struct mappings_entry *new_entries; + size_t n = map->nalloc << 1, a; + if (n == 0) + n = 1; + else if (n <= map->n) + return 2; + a = sizeof(*map->entries) * n; + if (a / n != sizeof(*map->entries)) + return 2; + new_entries = realloc(map->entries, a); + if (!new_entries) + return 2; + map->entries = new_entries; + map->nalloc = n; + } + return 0; +} + +static int mappings_add(struct mappings *map, int cp_uni, int cp_932) +{ + size_t i; + size_t s = 0, e = map->n; + struct mappings_entry *entry; + + for (;;) { + i = (s + e) / 2; + entry = &map->entries[i]; + if (e == i || entry->cp_uni > cp_uni) { + if (e == i) { + int r = mappings_grow(map); + if (r) + return r; + if (map->n > i) { + size_t n = map->n - i, a = sizeof(*map->entries) * n; + if (a / n != sizeof(*map->entries)) + return 2; + memmove(&map->entries[i + 1], &map->entries[i], a); + } + ++map->n; + entry = &map->entries[i]; + entry->cp_uni = cp_uni; + entry->n = 0; + break; + } + e = i; + } else if (entry->cp_uni < cp_uni) { + if (s == i) { + int r = mappings_grow(map); + if (r) + return r; + if (map->n > i + 1) { + size_t n = (map->n - i - 1), a = sizeof(*map->entries) * n; + if (a / n != sizeof(*map->entries)) + return 2; + memmove(&map->entries[i + 2], &map->entries[i + 1], a); + } + ++map->n; + entry = &map->entries[i + 1]; + entry->cp_uni = cp_uni; + entry->n = 0; + break; + } + s = i; + } else { + break; + } + } + if (entry->n >= sizeof(entry->cp_932) / sizeof(*entry->cp_932)) + return 1; + entry->cp_932[entry->n++] = cp_932; + return 0; +} + +struct generator_entry { + const char *name; + const char *prologue; + const char *epilogue; + void(*visitor)(const struct mappings_entry *); +}; + +static int utf32_utf8(char *buf, int k) +{ + int retval = 0; + + if (k < 0x80) { + buf[0] = k; + retval = 1; + } else if (k < 0x800) { + buf[0] = 0xc0 | (k >> 6); + buf[1] = 0x80 | (k & 0x3f); + retval = 2; + } else if (k < 0x10000) { + buf[0] = 0xe0 | (k >> 12); + buf[1] = 0x80 | ((k >> 6) & 0x3f); + buf[2] = 0x80 | (k & 0x3f); + retval = 3; + } else if (k < 0x200000) { + buf[0] = 0xf0 | (k >> 18); + buf[1] = 0x80 | ((k >> 12) & 0x3f); + buf[2] = 0x80 | ((k >> 6) & 0x3f); + buf[3] = 0x80 | (k & 0x3f); + retval = 4; + } else if (k < 0x4000000) { + buf[0] = 0xf8 | (k >> 24); + buf[1] = 0x80 | ((k >> 18) & 0x3f); + buf[2] = 0x80 | ((k >> 12) & 0x3f); + buf[3] = 0x80 | ((k >> 6) & 0x3f); + buf[4] = 0x80 | (k & 0x3f); + retval = 5; + } else { + buf[0] = 0xfc | (k >> 30); + buf[1] = 0x80 | ((k >> 24) & 0x3f); + buf[2] = 0x80 | ((k >> 18) & 0x3f); + buf[3] = 0x80 | ((k >> 12) & 0x3f); + buf[4] = 0x80 | ((k >> 6) & 0x3f); + buf[5] = 0x80 | (k & 0x3f); + retval = 6; + } + buf[retval] = '\0'; + + return retval; +} + +static const char epilogue[] = +"close\n"; + +static const char prologue_to_cp932[] = +"#!/usr/bin/expect -f\n" +"spawn tests/conv_encoding Japanese CP932 UTF-8\n" +"set timeout 1\n" +"\n" +"expect_after {\n" +" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n" +"}\n"; + +static const char prologue_to_cp50220[] = +"#!/usr/bin/expect -f\n" +"spawn tests/conv_encoding Japanese CP50220 UTF-8\n" +"set timeout 1\n" +"\n" +"expect_after {\n" +" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n" +"}\n"; + +static const char prologue_to_cp50222[] = +"#!/usr/bin/expect -f\n" +"spawn tests/conv_encoding Japanese CP50222 UTF-8\n" +"set timeout 1\n" +"\n" +"expect_after {\n" +" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n" +"}\n"; + +static const char prologue_from_cp932[] = +"#!/usr/bin/expect -f\n" +"spawn tests/conv_encoding Japanese UTF-8 CP932\n" +"set timeout 1\n" +"\n" +"expect_after {\n" +" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n" +"}\n"; + +static void to_cp932_visitor(const struct mappings_entry *entry) +{ + char buf_uni[32], buf_cp932[8]; + int i; + + if (entry->cp_uni < 32 || entry->cp_uni == 127) + return; + + i = utf32_utf8(buf_uni, entry->cp_uni); + buf_uni[i * 4] = '\0'; + while (--i >= 0) { + unsigned char c = ((unsigned char *)buf_uni)[i]; + buf_uni[i * 4] = '\\'; + buf_uni[i * 4 + 1] = 'x'; + buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4]; + buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15]; + } + + printf("set test \"U+%06X\"\n" + "send -- \"%s\r\"\n" + "sleep 0.001\n" + "expect {\n", entry->cp_uni, buf_uni); + + for (i = 0; i < entry->n; ++i) { + int len = 0; + const int c = entry->cp_932[i]; + if (c >= 0x100) { + len = 2; + sprintf(buf_cp932, "%%%02x%%%02x", (c >> 8) & 0xff, c & 0xff); + } else { + len = 1; + sprintf(buf_cp932, "%%%02x", c); + } + printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp932, len); + } + + printf("}\n"); +} + +static void from_cp932_visitor(const struct mappings_entry *entry) +{ + char buf_uni[32], buf_cp932[8]; + int i, len; + + if (entry->cp_uni < 32 || entry->cp_uni == 127) + return; + + len = utf32_utf8(buf_uni, entry->cp_uni); + buf_uni[len * 3] = '\0'; + i = len; + while (--i >= 0) { + unsigned char c = ((unsigned char *)buf_uni)[i]; + buf_uni[i * 3] = '%'; + buf_uni[i * 3 + 1] = "0123456789abcdef"[c >> 4]; + buf_uni[i * 3 + 2] = "0123456789abcdef"[c & 15]; + } + + for (i = 0; i < entry->n; ++i) { + const int c = entry->cp_932[i]; + if (c >= 0x100) + sprintf(buf_cp932, "\\x%02x\\x%02x", (c >> 8) & 0xff, c & 0xff); + else + sprintf(buf_cp932, "\\x%02x", c); + printf("set test \"U+%06X\"\n" + "send -- \"%s\r\"\n" + "sleep 0.001\n" + "expect {\n" + " \"%s (%d)\\r\\n\" { pass $test }\n" + "}\n", + entry->cp_uni, buf_cp932, buf_uni, len); + } +} + +static void to_cp50220_visitor(const struct mappings_entry *entry) +{ + char buf_uni[32], buf_cp50220[32]; + int i; + + if (entry->cp_uni < 32 || entry->cp_uni == 127) + return; + + i = utf32_utf8(buf_uni, entry->cp_uni); + buf_uni[i * 4] = '\0'; + while (--i >= 0) { + unsigned char c = ((unsigned char *)buf_uni)[i]; + buf_uni[i * 4] = '\\'; + buf_uni[i * 4 + 1] = 'x'; + buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4]; + buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15]; + } + + printf("set test \"U+%06X\"\n" + "send -- \"%s\r\"\n" + "sleep 0.001\n" + "expect {\n", entry->cp_uni, buf_uni); + + for (i = 0; i < entry->n; ++i) { + int len = 0; + const int c = entry->cp_932[i]; + if (c >= 0xa1 && c < 0xe0) { + static const int jisx0208_tl_map[] = { + 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521, + 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543, + 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d, + 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d, + 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c, + 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e, + 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569, + 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c + }; + const int j = jisx0208_tl_map[c - 0xa0]; + len = 8; + sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff); + } else if (c >= 0x100) { + const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100); + len = 8; + sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff); + } else { + len = 1; + sprintf(buf_cp50220, "%%%02x", c); + } + printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len); + } + + printf("}\n"); +} + +static void to_cp50222_visitor(const struct mappings_entry *entry) +{ + char buf_uni[32], buf_cp50220[32]; + int i; + + if (entry->cp_uni < 32 || entry->cp_uni == 127) + return; + + i = utf32_utf8(buf_uni, entry->cp_uni); + buf_uni[i * 4] = '\0'; + while (--i >= 0) { + unsigned char c = ((unsigned char *)buf_uni)[i]; + buf_uni[i * 4] = '\\'; + buf_uni[i * 4 + 1] = 'x'; + buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4]; + buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15]; + } + + printf("set test \"U+%06X\"\n" + "send -- \"%s\r\"\n" + "sleep 0.001\n" + "expect {\n", entry->cp_uni, buf_uni); + + for (i = 0; i < entry->n; ++i) { + int len = 0; + const int c = entry->cp_932[i]; + if (c >= 0xa1 && c < 0xe0) { + len = 3; + sprintf(buf_cp50220, "%%0e%%%02x%%0f", c - 0x80); + } else if (c >= 0x100) { + const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100); + len = 8; + sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff); + } else { + len = 1; + sprintf(buf_cp50220, "%%%02x", c); + } + printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len); + } + + printf("}\n"); +} + + +static struct generator_entry entries[] = { + { "to_cp932", prologue_to_cp932, epilogue, to_cp932_visitor }, + { "to_cp50220", prologue_to_cp50220, epilogue, to_cp50220_visitor }, + { "to_cp50222", prologue_to_cp50222, epilogue, to_cp50222_visitor }, + { "from_cp932", prologue_from_cp932, epilogue, from_cp932_visitor }, + { NULL } +}; + +static const char cp932_txt[] = "CP932.TXT"; + +int main(int argc, char **argv) +{ + int retval = 0; + FILE *fp; + char buf[1024]; + struct generator_entry* gen; + struct mappings map; + + if (argc <= 1) { + fprintf(stderr, "usage: %s generator\n", argv[0]); + return 255; + } + + for (gen = entries;; ++gen) { + if (!gen->name) { + fprintf(stderr, "Unknown generator: %s\n", argv[1]); + return 1; + } + if (strcmp(gen->name, argv[1]) == 0) + break; + } + + fp = fopen(cp932_txt, "r"); + if (!fp) { + fprintf(stderr, "Failed to open %s\n", cp932_txt); + return 2; + } + + mappings_init(&map); + + while (fgets(buf, sizeof(buf), fp)) { + const char *fields[16]; + char *p = buf; + int field = 0; + int cp_932, cp_uni; + for (;;) { + char *q = 0; + int eol = 0; + + if (field >= sizeof(fields) / sizeof(*fields)) { + fprintf(stderr, "Too many fields (incorrect file?)\n"); + retval = 3; + goto out; + } + + for (;;) { + if (*p == '\0' || *p == '#' || *p == 0x0a) { + eol = 1; + break; + } else if (*p != ' ' && *p != '\t') { + break; + } + ++p; + } + + if (eol) + break; + + q = p; + + for (;;) { + if (*p == '\0' || *p == '#' || *p == 0x0a) { + eol = 1; + break; + } else if (*p == ' ' || *p == '\t') { + break; + } + ++p; + } + + *p = '\0'; + fields[field++] = q; + + if (eol) + break; + ++p; + } + if (field == 0 || field == 1) { + continue; + } else if (field != 2) { + fprintf(stderr, "Unexpected field count (expected 2, got %d)\n", field); + retval = 3; + goto out; + } + cp_932 = strtol(fields[0], NULL, 0); + if (errno == ERANGE || errno == EINVAL) { + fprintf(stderr, "Malformed field value: %s\n", fields[0]); + retval = 4; + goto out; + } + cp_uni = strtol(fields[1], NULL, 0); + if (errno == ERANGE || errno == EINVAL) { + fprintf(stderr, "Malformed field value: %s\n", fields[1]); + retval = 4; + goto out; + } + + if (mappings_add(&map, cp_uni, cp_932)) { + fprintf(stderr, "Too many mappings to the same Unicode codepoint (U+%06X)\n", cp_uni); + retval = 4; + goto out; + } + } + + { + size_t i; + printf("%s", gen->prologue); + for (i = 0; i < map.n; ++i) + gen->visitor(&map.entries[i]); + printf("%s", gen->epilogue); + } + +out: + mappings_destroy(&map); + return retval; +} + +/* + * vim: sts=4 sw=4 ts=4 noet + */ |