summaryrefslogtreecommitdiff
path: root/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c')
-rw-r--r--ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c497
1 files changed, 497 insertions, 0 deletions
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c b/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c
new file mode 100644
index 0000000..93a0fb3
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c
@@ -0,0 +1,497 @@
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+struct mappings_entry {
+ int cp_uni;
+ int n;
+ int cp_932[16];
+};
+
+struct mappings {
+ size_t n;
+ size_t nalloc;
+ struct mappings_entry *entries;
+};
+
+static void mappings_init(struct mappings *map)
+{
+ map->n = 0;
+ map->nalloc = 0;
+ map->entries = 0;
+}
+
+static void mappings_destroy(struct mappings *map)
+{
+ if (map->entries)
+ free(map->entries);
+}
+
+static int mappings_grow(struct mappings *map)
+{
+ if (map->n >= map->nalloc) {
+ struct mappings_entry *new_entries;
+ size_t n = map->nalloc << 1, a;
+ if (n == 0)
+ n = 1;
+ else if (n <= map->n)
+ return 2;
+ a = sizeof(*map->entries) * n;
+ if (a / n != sizeof(*map->entries))
+ return 2;
+ new_entries = realloc(map->entries, a);
+ if (!new_entries)
+ return 2;
+ map->entries = new_entries;
+ map->nalloc = n;
+ }
+ return 0;
+}
+
+static int mappings_add(struct mappings *map, int cp_uni, int cp_932)
+{
+ size_t i;
+ size_t s = 0, e = map->n;
+ struct mappings_entry *entry;
+
+ for (;;) {
+ i = (s + e) / 2;
+ entry = &map->entries[i];
+ if (e == i || entry->cp_uni > cp_uni) {
+ if (e == i) {
+ int r = mappings_grow(map);
+ if (r)
+ return r;
+ if (map->n > i) {
+ size_t n = map->n - i, a = sizeof(*map->entries) * n;
+ if (a / n != sizeof(*map->entries))
+ return 2;
+ memmove(&map->entries[i + 1], &map->entries[i], a);
+ }
+ ++map->n;
+ entry = &map->entries[i];
+ entry->cp_uni = cp_uni;
+ entry->n = 0;
+ break;
+ }
+ e = i;
+ } else if (entry->cp_uni < cp_uni) {
+ if (s == i) {
+ int r = mappings_grow(map);
+ if (r)
+ return r;
+ if (map->n > i + 1) {
+ size_t n = (map->n - i - 1), a = sizeof(*map->entries) * n;
+ if (a / n != sizeof(*map->entries))
+ return 2;
+ memmove(&map->entries[i + 2], &map->entries[i + 1], a);
+ }
+ ++map->n;
+ entry = &map->entries[i + 1];
+ entry->cp_uni = cp_uni;
+ entry->n = 0;
+ break;
+ }
+ s = i;
+ } else {
+ break;
+ }
+ }
+ if (entry->n >= sizeof(entry->cp_932) / sizeof(*entry->cp_932))
+ return 1;
+ entry->cp_932[entry->n++] = cp_932;
+ return 0;
+}
+
+struct generator_entry {
+ const char *name;
+ const char *prologue;
+ const char *epilogue;
+ void(*visitor)(const struct mappings_entry *);
+};
+
+static int utf32_utf8(char *buf, int k)
+{
+ int retval = 0;
+
+ if (k < 0x80) {
+ buf[0] = k;
+ retval = 1;
+ } else if (k < 0x800) {
+ buf[0] = 0xc0 | (k >> 6);
+ buf[1] = 0x80 | (k & 0x3f);
+ retval = 2;
+ } else if (k < 0x10000) {
+ buf[0] = 0xe0 | (k >> 12);
+ buf[1] = 0x80 | ((k >> 6) & 0x3f);
+ buf[2] = 0x80 | (k & 0x3f);
+ retval = 3;
+ } else if (k < 0x200000) {
+ buf[0] = 0xf0 | (k >> 18);
+ buf[1] = 0x80 | ((k >> 12) & 0x3f);
+ buf[2] = 0x80 | ((k >> 6) & 0x3f);
+ buf[3] = 0x80 | (k & 0x3f);
+ retval = 4;
+ } else if (k < 0x4000000) {
+ buf[0] = 0xf8 | (k >> 24);
+ buf[1] = 0x80 | ((k >> 18) & 0x3f);
+ buf[2] = 0x80 | ((k >> 12) & 0x3f);
+ buf[3] = 0x80 | ((k >> 6) & 0x3f);
+ buf[4] = 0x80 | (k & 0x3f);
+ retval = 5;
+ } else {
+ buf[0] = 0xfc | (k >> 30);
+ buf[1] = 0x80 | ((k >> 24) & 0x3f);
+ buf[2] = 0x80 | ((k >> 18) & 0x3f);
+ buf[3] = 0x80 | ((k >> 12) & 0x3f);
+ buf[4] = 0x80 | ((k >> 6) & 0x3f);
+ buf[5] = 0x80 | (k & 0x3f);
+ retval = 6;
+ }
+ buf[retval] = '\0';
+
+ return retval;
+}
+
+static const char epilogue[] =
+"close\n";
+
+static const char prologue_to_cp932[] =
+"#!/usr/bin/expect -f\n"
+"spawn tests/conv_encoding Japanese CP932 UTF-8\n"
+"set timeout 1\n"
+"\n"
+"expect_after {\n"
+" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
+"}\n";
+
+static const char prologue_to_cp50220[] =
+"#!/usr/bin/expect -f\n"
+"spawn tests/conv_encoding Japanese CP50220 UTF-8\n"
+"set timeout 1\n"
+"\n"
+"expect_after {\n"
+" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
+"}\n";
+
+static const char prologue_to_cp50222[] =
+"#!/usr/bin/expect -f\n"
+"spawn tests/conv_encoding Japanese CP50222 UTF-8\n"
+"set timeout 1\n"
+"\n"
+"expect_after {\n"
+" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
+"}\n";
+
+static const char prologue_from_cp932[] =
+"#!/usr/bin/expect -f\n"
+"spawn tests/conv_encoding Japanese UTF-8 CP932\n"
+"set timeout 1\n"
+"\n"
+"expect_after {\n"
+" \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
+"}\n";
+
+static void to_cp932_visitor(const struct mappings_entry *entry)
+{
+ char buf_uni[32], buf_cp932[8];
+ int i;
+
+ if (entry->cp_uni < 32 || entry->cp_uni == 127)
+ return;
+
+ i = utf32_utf8(buf_uni, entry->cp_uni);
+ buf_uni[i * 4] = '\0';
+ while (--i >= 0) {
+ unsigned char c = ((unsigned char *)buf_uni)[i];
+ buf_uni[i * 4] = '\\';
+ buf_uni[i * 4 + 1] = 'x';
+ buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
+ buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
+ }
+
+ printf("set test \"U+%06X\"\n"
+ "send -- \"%s\r\"\n"
+ "sleep 0.001\n"
+ "expect {\n", entry->cp_uni, buf_uni);
+
+ for (i = 0; i < entry->n; ++i) {
+ int len = 0;
+ const int c = entry->cp_932[i];
+ if (c >= 0x100) {
+ len = 2;
+ sprintf(buf_cp932, "%%%02x%%%02x", (c >> 8) & 0xff, c & 0xff);
+ } else {
+ len = 1;
+ sprintf(buf_cp932, "%%%02x", c);
+ }
+ printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp932, len);
+ }
+
+ printf("}\n");
+}
+
+static void from_cp932_visitor(const struct mappings_entry *entry)
+{
+ char buf_uni[32], buf_cp932[8];
+ int i, len;
+
+ if (entry->cp_uni < 32 || entry->cp_uni == 127)
+ return;
+
+ len = utf32_utf8(buf_uni, entry->cp_uni);
+ buf_uni[len * 3] = '\0';
+ i = len;
+ while (--i >= 0) {
+ unsigned char c = ((unsigned char *)buf_uni)[i];
+ buf_uni[i * 3] = '%';
+ buf_uni[i * 3 + 1] = "0123456789abcdef"[c >> 4];
+ buf_uni[i * 3 + 2] = "0123456789abcdef"[c & 15];
+ }
+
+ for (i = 0; i < entry->n; ++i) {
+ const int c = entry->cp_932[i];
+ if (c >= 0x100)
+ sprintf(buf_cp932, "\\x%02x\\x%02x", (c >> 8) & 0xff, c & 0xff);
+ else
+ sprintf(buf_cp932, "\\x%02x", c);
+ printf("set test \"U+%06X\"\n"
+ "send -- \"%s\r\"\n"
+ "sleep 0.001\n"
+ "expect {\n"
+ " \"%s (%d)\\r\\n\" { pass $test }\n"
+ "}\n",
+ entry->cp_uni, buf_cp932, buf_uni, len);
+ }
+}
+
+static void to_cp50220_visitor(const struct mappings_entry *entry)
+{
+ char buf_uni[32], buf_cp50220[32];
+ int i;
+
+ if (entry->cp_uni < 32 || entry->cp_uni == 127)
+ return;
+
+ i = utf32_utf8(buf_uni, entry->cp_uni);
+ buf_uni[i * 4] = '\0';
+ while (--i >= 0) {
+ unsigned char c = ((unsigned char *)buf_uni)[i];
+ buf_uni[i * 4] = '\\';
+ buf_uni[i * 4 + 1] = 'x';
+ buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
+ buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
+ }
+
+ printf("set test \"U+%06X\"\n"
+ "send -- \"%s\r\"\n"
+ "sleep 0.001\n"
+ "expect {\n", entry->cp_uni, buf_uni);
+
+ for (i = 0; i < entry->n; ++i) {
+ int len = 0;
+ const int c = entry->cp_932[i];
+ if (c >= 0xa1 && c < 0xe0) {
+ static const int jisx0208_tl_map[] = {
+ 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
+ 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
+ 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
+ 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
+ 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
+ 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
+ 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
+ 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
+ };
+ const int j = jisx0208_tl_map[c - 0xa0];
+ len = 8;
+ sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
+ } else if (c >= 0x100) {
+ const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
+ len = 8;
+ sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
+ } else {
+ len = 1;
+ sprintf(buf_cp50220, "%%%02x", c);
+ }
+ printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
+ }
+
+ printf("}\n");
+}
+
+static void to_cp50222_visitor(const struct mappings_entry *entry)
+{
+ char buf_uni[32], buf_cp50220[32];
+ int i;
+
+ if (entry->cp_uni < 32 || entry->cp_uni == 127)
+ return;
+
+ i = utf32_utf8(buf_uni, entry->cp_uni);
+ buf_uni[i * 4] = '\0';
+ while (--i >= 0) {
+ unsigned char c = ((unsigned char *)buf_uni)[i];
+ buf_uni[i * 4] = '\\';
+ buf_uni[i * 4 + 1] = 'x';
+ buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
+ buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
+ }
+
+ printf("set test \"U+%06X\"\n"
+ "send -- \"%s\r\"\n"
+ "sleep 0.001\n"
+ "expect {\n", entry->cp_uni, buf_uni);
+
+ for (i = 0; i < entry->n; ++i) {
+ int len = 0;
+ const int c = entry->cp_932[i];
+ if (c >= 0xa1 && c < 0xe0) {
+ len = 3;
+ sprintf(buf_cp50220, "%%0e%%%02x%%0f", c - 0x80);
+ } else if (c >= 0x100) {
+ const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
+ len = 8;
+ sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
+ } else {
+ len = 1;
+ sprintf(buf_cp50220, "%%%02x", c);
+ }
+ printf(" \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
+ }
+
+ printf("}\n");
+}
+
+
+static struct generator_entry entries[] = {
+ { "to_cp932", prologue_to_cp932, epilogue, to_cp932_visitor },
+ { "to_cp50220", prologue_to_cp50220, epilogue, to_cp50220_visitor },
+ { "to_cp50222", prologue_to_cp50222, epilogue, to_cp50222_visitor },
+ { "from_cp932", prologue_from_cp932, epilogue, from_cp932_visitor },
+ { NULL }
+};
+
+static const char cp932_txt[] = "CP932.TXT";
+
+int main(int argc, char **argv)
+{
+ int retval = 0;
+ FILE *fp;
+ char buf[1024];
+ struct generator_entry* gen;
+ struct mappings map;
+
+ if (argc <= 1) {
+ fprintf(stderr, "usage: %s generator\n", argv[0]);
+ return 255;
+ }
+
+ for (gen = entries;; ++gen) {
+ if (!gen->name) {
+ fprintf(stderr, "Unknown generator: %s\n", argv[1]);
+ return 1;
+ }
+ if (strcmp(gen->name, argv[1]) == 0)
+ break;
+ }
+
+ fp = fopen(cp932_txt, "r");
+ if (!fp) {
+ fprintf(stderr, "Failed to open %s\n", cp932_txt);
+ return 2;
+ }
+
+ mappings_init(&map);
+
+ while (fgets(buf, sizeof(buf), fp)) {
+ const char *fields[16];
+ char *p = buf;
+ int field = 0;
+ int cp_932, cp_uni;
+ for (;;) {
+ char *q = 0;
+ int eol = 0;
+
+ if (field >= sizeof(fields) / sizeof(*fields)) {
+ fprintf(stderr, "Too many fields (incorrect file?)\n");
+ retval = 3;
+ goto out;
+ }
+
+ for (;;) {
+ if (*p == '\0' || *p == '#' || *p == 0x0a) {
+ eol = 1;
+ break;
+ } else if (*p != ' ' && *p != '\t') {
+ break;
+ }
+ ++p;
+ }
+
+ if (eol)
+ break;
+
+ q = p;
+
+ for (;;) {
+ if (*p == '\0' || *p == '#' || *p == 0x0a) {
+ eol = 1;
+ break;
+ } else if (*p == ' ' || *p == '\t') {
+ break;
+ }
+ ++p;
+ }
+
+ *p = '\0';
+ fields[field++] = q;
+
+ if (eol)
+ break;
+ ++p;
+ }
+ if (field == 0 || field == 1) {
+ continue;
+ } else if (field != 2) {
+ fprintf(stderr, "Unexpected field count (expected 2, got %d)\n", field);
+ retval = 3;
+ goto out;
+ }
+ cp_932 = strtol(fields[0], NULL, 0);
+ if (errno == ERANGE || errno == EINVAL) {
+ fprintf(stderr, "Malformed field value: %s\n", fields[0]);
+ retval = 4;
+ goto out;
+ }
+ cp_uni = strtol(fields[1], NULL, 0);
+ if (errno == ERANGE || errno == EINVAL) {
+ fprintf(stderr, "Malformed field value: %s\n", fields[1]);
+ retval = 4;
+ goto out;
+ }
+
+ if (mappings_add(&map, cp_uni, cp_932)) {
+ fprintf(stderr, "Too many mappings to the same Unicode codepoint (U+%06X)\n", cp_uni);
+ retval = 4;
+ goto out;
+ }
+ }
+
+ {
+ size_t i;
+ printf("%s", gen->prologue);
+ for (i = 0; i < map.n; ++i)
+ gen->visitor(&map.entries[i]);
+ printf("%s", gen->epilogue);
+ }
+
+out:
+ mappings_destroy(&map);
+ return retval;
+}
+
+/*
+ * vim: sts=4 sw=4 ts=4 noet
+ */