summaryrefslogtreecommitdiff
path: root/strpool.c
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2008-07-03 22:46:19 +0200
committerLennart Poettering <lennart@poettering.net>2008-07-03 22:46:19 +0200
commit659d98ee3391f09108de188eb58ea76f74e60f7f (patch)
tree5065ccfd3c7dc7566a8dc810715165a87ce2beb6 /strpool.c
parent0e07e7f1be6cee31243fa21fc2b3f0c018c58aff (diff)
downloadlibatasmart-659d98ee3391f09108de188eb58ea76f74e60f7f.tar.gz
add a simple string pool generator
Diffstat (limited to 'strpool.c')
-rw-r--r--strpool.c598
1 files changed, 598 insertions, 0 deletions
diff --git a/strpool.c b/strpool.c
new file mode 100644
index 0000000..385ab1a
--- /dev/null
+++ b/strpool.c
@@ -0,0 +1,598 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <assert.h>
+#include <errno.h>
+
+typedef struct item {
+ char *cnt;
+ size_t cntl;
+ char *text;
+ size_t size;
+ unsigned idx;
+ struct item *suffix_of;
+ struct item *next;
+} item;
+
+static void free_items(struct item *first) {
+
+ while (first) {
+ struct item *n = first->next;
+
+ free(first->cnt);
+ free(first->text);
+ free(first);
+
+ first = n;
+ }
+
+}
+
+static void find_suffixes(struct item *first) {
+ struct item *i, *j;
+
+ for (i = first; i; i = i->next) {
+ int right = 0;
+
+ for (j = first; j; j = j->next) {
+
+ if (i == j) {
+ right = 1;
+ continue;
+ }
+
+ if (i->size > j->size)
+ continue;
+
+ if (i->size == j->size && !right)
+ continue;
+
+ if (memcmp(i->text, j->text+j->size-i->size, i->size) != 0)
+ continue;
+
+ i->suffix_of = j;
+ break;
+ }
+ }
+}
+
+static void fill_idx(struct item *first) {
+ struct item *i;
+ unsigned k = 0;
+
+ for (i = first; i; i = i->next) {
+ if (i->suffix_of)
+ continue;
+
+ i->idx = k;
+ k += i->size+1;
+ }
+
+ for (i = first; i; i = i->next) {
+ struct item *p;
+
+ if (!i->suffix_of)
+ continue;
+
+ for (p = i->suffix_of; p->suffix_of; p = p->suffix_of)
+ ;
+
+ assert(i->size <= p->size);
+ assert(memcmp(i->text, p->text + p->size - i->size, i->size) == 0);
+
+ i->idx = p->idx + p->size - i->size;
+ }
+}
+
+static void dump_string(FILE *out, struct item *i) {
+ const char *t;
+
+ fputs("\n\t\"", out);
+
+ for (t = i->text; t < i->text+i->size; t++) {
+ switch (*t) {
+ case '\\':
+ fputs("\\\\", out);
+ break;
+ case '\"':
+ fputs("\\\"", out);
+ break;
+ case '\'':
+ fputs("\\'", out);
+ break;
+ case '\n':
+ fputs("\\n\"\n\t\"", out);
+ break;
+ case '\r':
+ fputs("\\r", out);
+ break;
+ case '\b':
+ fputs("\\b", out);
+ break;
+ case '\t':
+ fputs("\\t", out);
+ break;
+ case '\f':
+ fputs("\\f", out);
+ break;
+ case '\a':
+ fputs("\\f", out);
+ break;
+ case '\v':
+ fputs("\\v", out);
+ break;
+ default:
+ if (*t >= 32 && *t < 127)
+ putc(*t, out);
+ else
+ fprintf(out, "\\x%02x", *t);
+ break;
+ }
+ }
+
+ fputs("\\0\"", out);
+}
+
+static void dump_text(FILE *out, struct item *first) {
+ struct item *i;
+
+ for (i = first; i; i = i->next) {
+
+ if (i->cnt)
+ fwrite(i->cnt, 1, i->cntl, out);
+
+ fprintf(out, "((const char*) %u)", i->idx);
+ }
+}
+
+static void dump_pool(FILE *out, struct item *first) {
+ struct item *i;
+ int saved_rel=-1, saved_bytes=0, saved_strings=0;
+
+ for (i = first; i; i = i->next) {
+ saved_rel++;
+
+ if (i->suffix_of) {
+ saved_strings ++;
+ saved_bytes += i->size;
+ }
+ }
+
+ fprintf(out, "/* Saved %i relocations, saved %i strings (%i b) due to suffix compression. */\n", saved_rel, saved_strings, saved_bytes);
+
+
+ fputs("const static char _strpool_[] =", out);
+
+ for (i = first; i; i = i->next) {
+
+ if (i->suffix_of)
+ fputs("\n\t/*** Suppressed due to suffix: ", out);
+
+ dump_string(out, i);
+
+ if (i->suffix_of)
+ fputs(" ***/", out);
+
+ }
+
+ fputs(";\n", out);
+}
+
+static char *append(char *r, size_t *rl, char **c, size_t n) {
+
+ r = realloc(r, *rl + n);
+
+ if (!r)
+ abort();
+
+ memcpy(r + *rl, *c, n);
+
+ *rl += n;
+ *c += n;
+
+ return r;
+}
+
+static int parse_hex_digit(char c) {
+
+ if (c >= '0' && c <= '9')
+ return c - '0' + 0x0;
+
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 0xA;
+
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 0xA;
+
+ return -1;
+}
+
+static int parse_hex(const char *t, char *r) {
+ int a, b = 0;
+ int k = 1;
+
+ if ((a = parse_hex_digit(t[0])) < 0)
+ return -1;
+
+ if (t[1]) {
+ if ((b = parse_hex_digit(t[1])) < 0)
+ b = 0;
+ else
+ k = 2;
+ }
+
+ *r = (a << 4) | b;
+ return k;
+}
+
+static int parse_oct_digit(char c) {
+
+ if (c >= '0' && c <= '7')
+ return c - '0';
+
+ return -1;
+}
+
+static int parse_oct(const char *t, char *r) {
+ int a, b = 0, c = 0, m;
+ int k = 1;
+
+ if ((a = parse_oct_digit(t[0])) < 0)
+ return -1;
+
+ if (t[1]) {
+
+ if ((b = parse_oct_digit(t[1])) < 0)
+ b = 0;
+ else {
+ k = 2;
+
+ if (t[2]) {
+
+ if ((c = parse_oct_digit(t[2])) < 0)
+ c = 0;
+ else
+ k = 3;
+ }
+ }
+ }
+
+ m = (a << 6) | (b << 3) | c;
+
+ if (m > 0xFF)
+ return -1;
+
+ *r = (char) m;
+
+ return k;
+}
+
+static int parse(FILE *in, struct item **rfirst, char **remain, size_t *remain_size) {
+
+ int enabled = 0;
+ enum {
+ STATE_TEXT,
+ STATE_COMMENT_C,
+ STATE_COMMENT_CPP,
+ STATE_STRING,
+ STATE_CHAR,
+ } state = STATE_TEXT;
+
+ char *r = NULL;
+ size_t rl = 0;
+ char *cnt = NULL;
+ size_t cntl = 0;
+ struct item *first = NULL, *last = NULL;
+ *rfirst = NULL;
+
+ for (;;) {
+ char t[1024], *c;
+ int done = 0;
+
+ if (!(fgets(t, sizeof(t), in))) {
+
+ if (feof(in))
+ break;
+
+ fprintf(stderr, "Failed to read: %s\n", strerror(errno));
+ goto fail;
+ }
+
+ c = t;
+
+ do {
+
+/* fprintf(stderr, "enabled %i, state %i, cnt %i, remaining string is: %s", enabled, state, !!cnt, c); */
+
+ switch (state) {
+
+ case STATE_TEXT:
+
+ if (!strncmp(c, "/*", 2)) {
+ state = STATE_COMMENT_C;
+ r = append(r, &rl, &c, 2);
+ } else if (!strncmp(c, "//", 2)) {
+ state = STATE_COMMENT_CPP;
+ r = append(r, &rl, &c, 2);
+ } else if (*c == '"') {
+ state = STATE_STRING;
+
+ if (enabled) {
+ cnt = r;
+ cntl = rl;
+
+ r = NULL;
+ rl = 0;
+
+ c ++;
+ } else
+ r = append(r, &rl, &c, 1);
+ } else if (*c == '\'') {
+ state = STATE_CHAR;
+ r = append(r, &rl, &c, 1);
+ } else if (*c == 0)
+ done = 1;
+ else
+ r = append(r, &rl, &c, 1);
+
+ break;
+
+ case STATE_COMMENT_C:
+
+ if (!strncmp(c, "*/", 2)) {
+ state = STATE_TEXT;
+ r = append(r, &rl, &c, 2);
+ } else if (!strncmp(c, "%STRINGPOOLSTART%", 17)) {
+ enabled = 1;
+ r = append(r, &rl, &c, 17);
+ } else if (!strncmp(c, "%STRINGPOOLSTOP%", 16)) {
+ enabled = 0;
+ r = append(r, &rl, &c, 16);
+ } else if (*c == 0)
+ done = 1;
+ else
+ r = append(r, &rl, &c, 1);
+
+ break;
+
+ case STATE_COMMENT_CPP:
+
+ if (*c == '\n' || *c == '\r') {
+ state = STATE_TEXT;
+ r = append(r, &rl, &c, 1);
+ } else if (!strncmp(c, "%STRINGPOOLSTART%", 17)) {
+ enabled = 1;
+ r = append(r, &rl, &c, 17);
+ } else if (!strncmp(c, "%STRINGPOOLSTOP%", 16)) {
+ enabled = 0;
+ r = append(r, &rl, &c, 16);
+ } else if (*c == 0) {
+ state = STATE_TEXT;
+ done = 1;
+ } else
+ r = append(r, &rl, &c, 1);
+
+ break;
+
+ case STATE_STRING:
+ case STATE_CHAR:
+
+ if ((*c == '\'' && state == STATE_CHAR) || (*c == '"' && state == STATE_STRING)) {
+
+ if (state == STATE_STRING && enabled) {
+ struct item *i;
+ i = malloc(sizeof(struct item));
+
+ if (!i)
+ abort();
+
+ i->cnt = cnt;
+ i->cntl = cntl;
+
+ cnt = NULL;
+ cntl = 0;
+
+ i->text = r;
+ i->size = rl;
+
+ r = NULL;
+ rl = 0;
+
+ i->next = NULL;
+
+ if (last)
+ last->next = i;
+ else
+ first = i;
+
+ last = i;
+
+ c++;
+
+ } else
+ r = append(r, &rl, &c, 1);
+
+ state = STATE_TEXT;
+
+ } else if (*c == '\\') {
+
+ char d;
+ char l = 2;
+
+ switch (c[1]) {
+
+ case '\\':
+ case '"':
+ case '\'':
+ case '?':
+ d = c[1];
+ break;
+ case 'n':
+ d = '\n';
+ break;
+ case 'r':
+ d = '\r';
+ break;
+ case 'b':
+ d = '\b';
+ break;
+ case 't':
+ d = '\t';
+ break;
+ case 'f':
+ d = '\f';
+ break;
+ case 'a':
+ d = '\a';
+ break;
+ case 'v':
+ d = '\v';
+ break;
+ case 'x': {
+ int k;
+ if ((k = parse_hex(c+2, &d)) < 0) {
+ fprintf(stderr, "Parse failure: invalid hexadecimal escape sequence.\n");
+ goto fail;
+ }
+ l = 2 + k;
+ break;
+ }
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ int k;
+ if ((k = parse_oct(c+1, &d)) < 0) {
+ fprintf(stderr, "Parse failure: invalid octal escape sequence.\n");
+ goto fail;
+ }
+ l = 1 + k;
+ break;
+ }
+ default:
+ fprintf(stderr, "Parse failure: invalid escape sequence.\n");
+ goto fail;
+ }
+
+ if (state == STATE_STRING && enabled) {
+ char *x = &d;
+ r = append(r, &rl, &x, 1);
+ c += l;
+ } else
+ r = append(r, &rl, &c, l);
+ } else if (*c == 0) {
+ fprintf(stderr, "Parse failure: multiline strings suck.\n");
+ goto fail;
+ } else
+ r = append(r, &rl, &c, 1);
+
+ break;
+ }
+ } while (!done);
+ }
+
+ if (enabled) {
+ fprintf(stderr, "Parse failure: missing %%STRINGPOOLSTOP%%\n");
+ goto fail;
+ }
+
+ if (state != STATE_TEXT) {
+ fprintf(stderr, "Parse failure: unexpected EOF.\n");
+ goto fail;
+ }
+
+ assert(!cnt);
+
+ *rfirst = first;
+
+ *remain = r;
+ *remain_size = rl;
+
+ return 0;
+
+fail:
+
+ free(cnt);
+ free(r);
+ free_items(first);
+
+ return -1;
+}
+
+static int process(FILE *in, FILE *out) {
+
+ struct item *first = NULL;
+ char *remain = NULL;
+ size_t remain_size = 0;
+
+ if (parse(in, &first, &remain, &remain_size) < 0)
+ return -1;
+
+ if (!first)
+ fwrite(remain, 1, remain_size, out);
+ else {
+ find_suffixes(first);
+ fill_idx(first);
+
+ dump_pool(out, first);
+
+ fprintf(out,
+ "#ifndef STRPOOL\n"
+ "#define STRPOOL\n"
+ "#endif\n"
+ "#ifndef _P\n"
+ "#define _P(x) (_strpool_ + (unsigned) (x))\n"
+ "#endif\n\n");
+
+ dump_text(out, first);
+ fwrite(remain, 1, remain_size, out);
+
+ free_items(first);
+ }
+
+ free(remain);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ int ret;
+ FILE *in = NULL, *out = NULL;
+
+ if (argc > 1) {
+ if (!(in = fopen(argv[1], "r"))) {
+ fprintf(stderr, "Failed to open '%s': %s\n", argv[1], strerror(errno));
+ return 1;
+ }
+ } else
+ in = stdin;
+
+ if (argc > 2) {
+
+ if (!(out = fopen(argv[2], "2"))) {
+ fprintf(stderr, "Failed to open '%s': %s\n", argv[2], strerror(errno));
+ return 1;
+ }
+ } else
+ out = stdout;
+
+ if (process(in, out) < 0)
+ goto finish;
+
+ ret = 0;
+
+finish:
+
+ if (in != stdin)
+ fclose(in);
+
+ if (out != stdout)
+ fclose(out);
+
+ return ret;
+}