summaryrefslogtreecommitdiff
path: root/unproto/tok_class.c
diff options
context:
space:
mode:
Diffstat (limited to 'unproto/tok_class.c')
-rw-r--r--unproto/tok_class.c432
1 files changed, 432 insertions, 0 deletions
diff --git a/unproto/tok_class.c b/unproto/tok_class.c
new file mode 100644
index 0000000..38ccd0d
--- /dev/null
+++ b/unproto/tok_class.c
@@ -0,0 +1,432 @@
+/*++
+/* NAME
+/* tok_class 3
+/* SUMMARY
+/* token classification
+/* PACKAGE
+/* unproto
+/* SYNOPSIS
+/* #include "token.h"
+/*
+/* void tok_unget(t)
+/* struct token *t;
+/*
+/* struct token *tok_class()
+/* DESCRIPTION
+/* tok_class() collects single and composite tokens, and
+/* recognizes keywords.
+/* At present, the only composite tokens are ()-delimited,
+/* comma-separated lists, and non-whitespace tokens with attached
+/* whitespace or comment tokens.
+/*
+/* Source transformations are: __DATE__ and __TIME__ are rewritten
+/* to string constants with the current date and time, respectively.
+/* Multiple string constants are concatenated. Optionally, "void *"
+/* is mapped to "char *", and plain "void" to "int".
+/*
+/* tok_unget() implements an arbitrary amount of token pushback.
+/* Only tokens obtained through tok_class() should be given to
+/* tok_unget(). This function accepts a list of tokens in
+/* last-read-first order.
+/* DIAGNOSTICS
+/* The code complains if input terminates in the middle of a list.
+/* BUGS
+/* Does not preserve white space at the beginning of a list element
+/* or after the end of a list.
+/* AUTHOR(S)
+/* Wietse Venema
+/* Eindhoven University of Technology
+/* Department of Mathematics and Computer Science
+/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
+/* LAST MODIFICATION
+/* 92/01/15 21:53:02
+/* VERSION/RELEASE
+/* 1.4
+/*--*/
+
+static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02";
+
+/* C library */
+
+#include <stdio.h>
+
+extern char *strcpy();
+extern long time();
+extern char *ctime();
+
+/* Application-specific stuff */
+
+#include "error.h"
+#include "vstring.h"
+#include "token.h"
+#include "symbol.h"
+
+static struct token *tok_list();
+static void tok_list_struct();
+static void tok_list_append();
+static void tok_strcat();
+static void tok_time();
+static void tok_date();
+static void tok_space_append();
+
+#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
+static void tok_void(); /* rewrite void keyword */
+#endif
+
+static struct token *tok_buf = 0; /* token push-back storage */
+
+/* TOK_PREPEND - add token to LIFO queue, return head */
+
+#define TOK_PREPEND(list,t) (t->next = list, list = t)
+
+/* tok_space_append - append trailing space except at start of or after list */
+
+static void tok_space_append(list, t)
+register struct token *list;
+register struct token *t;
+{
+
+ /*
+ * The head/tail fields of a token do triple duty. They are used to keep
+ * track of the members that make up a (list); to keep track of the
+ * non-blank tokens that make up one list member; and, finally, to tack
+ * whitespace and comment tokens onto the non-blank tokens that make up
+ * one list member.
+ *
+ * Within a (list), white space and comment tokens are always tacked onto
+ * the non-blank tokens to avoid parsing complications later on. For this
+ * reason, blanks and comments at the beginning of a list member are
+ * discarded because there is no token to tack them onto. (Well, we could
+ * start each list member with a dummy token, but that would mess up the
+ * whole unprototyper).
+ *
+ * Blanks or comments that follow a (list) are discarded, because the
+ * head/tail fields of a (list) are already being used for other
+ * purposes.
+ *
+ * Newlines within a (list) are discarded because they can mess up the
+ * output when we rewrite function headers. The output routines will
+ * regenerate discarded newlines, anyway.
+ */
+
+ if (list == 0 || list->tokno == TOK_LIST) {
+ tok_free(t);
+ } else {
+ tok_list_append(list, t);
+ }
+}
+
+/* tok_class - discriminate single tokens, keywords, and composite tokens */
+
+struct token *tok_class()
+{
+ register struct token *t;
+ register struct symbol *s;
+
+ /*
+ * Use push-back token, if available. Push-back tokens are already
+ * canonical and can be passed on to the caller without further
+ * inspection.
+ */
+
+ if (t = tok_buf) {
+ tok_buf = t->next;
+ t->next = 0;
+ return (t);
+ }
+ /* Read a new token and canonicalize it. */
+
+ if (t = tok_get()) {
+ switch (t->tokno) {
+ case '(': /* beginning of list */
+ t = tok_list(t);
+ break;
+ case TOK_WORD: /* look up keyword */
+ if ((s = sym_find(t->vstr->str))) {
+ switch (s->type) {
+ case TOK_TIME: /* map __TIME__ to string */
+ tok_time(t);
+ tok_strcat(t); /* look for more strings */
+ break;
+ case TOK_DATE: /* map __DATE__ to string */
+ tok_date(t);
+ tok_strcat(t); /* look for more strings */
+ break;
+#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
+ case TOK_VOID: /* optionally map void types */
+ tok_void(t);
+ break;
+#endif
+ default: /* other keyword */
+ t->tokno = s->type;
+ break;
+ }
+ }
+ break;
+ case '"': /* string, look for more */
+ tok_strcat(t);
+ break;
+ }
+ }
+ return (t);
+}
+
+/* tok_list - collect ()-delimited, comma-separated list of tokens */
+
+static struct token *tok_list(t)
+struct token *t;
+{
+ register struct token *list = tok_alloc();
+ char *filename;
+ int lineno;
+
+ /* Save context of '(' for diagnostics. */
+
+ filename = t->path;
+ lineno = t->line;
+
+ list->tokno = TOK_LIST;
+ list->head = list->tail = t;
+ list->path = t->path;
+ list->line = t->line;
+#ifdef DEBUG
+ strcpy(list->vstr->str, "LIST");
+#endif
+
+ /*
+ * Read until the matching ')' is found, accounting for structured stuff
+ * (enclosed by '{' and '}' tokens). Break the list up at each ',' token,
+ * and try to preserve as much whitespace as possible. Newlines are
+ * discarded so that they will not mess up the layout when we rewrite
+ * argument lists. The output routines will regenerate discarded
+ * newlines.
+ */
+
+ while (t = tok_class()) { /* skip blanks */
+ switch (t->tokno) {
+ case ')': /* end of list */
+ tok_list_append(list, t);
+ return (list);
+ case '{': /* struct/union type */
+ tok_list_struct(list->tail, t);
+ break;
+ case TOK_WSPACE: /* preserve trailing blanks */
+ tok_space_append(list->tail->tail, t); /* except after list */
+ break;
+ case '\n': /* fix newlines later */
+ tok_free(t);
+ break;
+ case ',': /* list separator */
+ tok_list_append(list, t);
+ break;
+ default: /* other */
+ tok_list_append(list->tail, t);
+ break;
+ }
+ }
+ error_where(filename, lineno, "unmatched '('");
+ return (list); /* do not waste any data */
+}
+
+/* tok_list_struct - collect structured type info within list */
+
+static void tok_list_struct(list, t)
+register struct token *list;
+register struct token *t;
+{
+ char *filename;
+ int lineno;
+
+ /*
+ * Save context of '{' for diagnostics. This routine is called by the one
+ * that collects list members. If the '}' is not found, the list
+ * collector will not see the closing ')' either.
+ */
+
+ filename = t->path;
+ lineno = t->line;
+
+ tok_list_append(list, t);
+
+ /*
+ * Collect tokens until the matching '}' is found. Try to preserve as
+ * much whitespace as possible. Newlines are discarded so that they do
+ * not interfere when rewriting argument lists. The output routines will
+ * regenerate discarded newlines.
+ */
+
+ while (t = tok_class()) {
+ switch (t->tokno) {
+ case TOK_WSPACE: /* preserve trailing blanks */
+ tok_space_append(list->tail, t); /* except after list */
+ break;
+ case '\n': /* fix newlines later */
+ tok_free(t);
+ break;
+ case '{': /* recurse */
+ tok_list_struct(list, t);
+ break;
+ case '}': /* done */
+ tok_list_append(list, t);
+ return;
+ default: /* other */
+ tok_list_append(list, t);
+ break;
+ }
+ }
+ error_where(filename, lineno, "unmatched '{'");
+}
+
+/* tok_strcat - concatenate multiple string constants */
+
+static void tok_strcat(t1)
+register struct token *t1;
+{
+ register struct token *t2;
+ register struct token *lookahead = 0;
+
+ /*
+ * Read ahead past whitespace, comments and newlines. If we find a string
+ * token, concatenate it with the previous one and push back the
+ * intervening tokens (thus preserving as much information as possible).
+ * If we find something else, push back all lookahead tokens.
+ */
+
+#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }
+
+ while (t2 = tok_class()) {
+ switch (t2->tokno) {
+ case TOK_WSPACE: /* read past comments/blanks */
+ case '\n': /* read past newlines */
+ TOK_PREPEND(lookahead, t2);
+ break;
+ case '"': /* concatenate string tokens */
+ if (vs_strcpy(t1->vstr,
+ t1->vstr->str + strlen(t1->vstr->str) - 1,
+ t2->vstr->str + 1) == 0)
+ fatal("out of memory");
+ tok_free(t2);
+ PUSHBACK_AND_RETURN;
+ default: /* something else, push back */
+ tok_unget(t2);
+ PUSHBACK_AND_RETURN;
+ }
+ }
+ PUSHBACK_AND_RETURN; /* hit EOF */
+}
+
+#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
+
+/* tok_void - support for compilers that have problems with "void" */
+
+static void tok_void(t)
+register struct token *t;
+{
+ register struct token *t2;
+ register struct token *lookahead = 0;
+
+ /*
+ * Look ahead beyond whitespace, comments and newlines until we see a '*'
+ * token. If one is found, replace "void" by "char". If we find something
+ * else, and if "void" should always be mapped, replace "void" by "int".
+ * Always push back the lookahead tokens.
+ *
+ * XXX The code also replaces the (void) argument list; this must be
+ * accounted for later on. The alternative would be to add (in unproto.c)
+ * TOK_VOID cases all over the place and that would be too error-prone.
+ */
+
+#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }
+
+ while (t2 = tok_class()) {
+ switch (TOK_PREPEND(lookahead, t2)->tokno) {
+ case TOK_WSPACE: /* read past comments/blanks */
+ case '\n': /* read past newline */
+ break;
+ case '*': /* "void *" -> "char *" */
+ if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0)
+ fatal("out of memory");
+ PUSHBACK_AND_RETURN;
+ default:
+#ifdef MAP_VOID /* plain "void" -> "int" */
+ if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0)
+ fatal("out of memory");
+#endif
+ PUSHBACK_AND_RETURN;
+ }
+ }
+ PUSHBACK_AND_RETURN; /* hit EOF */
+}
+
+#endif
+
+/* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */
+
+static void tok_time(t)
+struct token *t;
+{
+ long now;
+ char *cp;
+ char buf[BUFSIZ];
+
+ /*
+ * Using sprintf() to select parts of a string is gross, but this should
+ * be fast enough.
+ */
+
+ (void) time(&now);
+ cp = ctime(&now);
+ sprintf(buf, "\"%.8s\"", cp + 11);
+ if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
+ fatal("out of memory");
+ t->tokno = buf[0];
+}
+
+/* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */
+
+static void tok_date(t)
+struct token *t;
+{
+ long now;
+ char *cp;
+ char buf[BUFSIZ];
+
+ /*
+ * Using sprintf() to select parts of a string is gross, but this should
+ * be fast enough.
+ */
+
+ (void) time(&now);
+ cp = ctime(&now);
+ sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20);
+ if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
+ fatal("out of memory");
+ t->tokno = buf[0];
+}
+
+/* tok_unget - push back one or more possibly composite tokens */
+
+void tok_unget(t)
+register struct token *t;
+{
+ register struct token *next;
+
+ do {
+ next = t->next;
+ TOK_PREPEND(tok_buf, t);
+ } while (t = next);
+}
+
+/* tok_list_append - append data to list */
+
+static void tok_list_append(h, t)
+struct token *h;
+struct token *t;
+{
+ if (h->head == 0) {
+ h->head = h->tail = t;
+ } else {
+ h->tail->next = t;
+ h->tail = t;
+ }
+}