diff options
Diffstat (limited to 'unproto/tok_class.c')
-rw-r--r-- | unproto/tok_class.c | 432 |
1 files changed, 432 insertions, 0 deletions
diff --git a/unproto/tok_class.c b/unproto/tok_class.c new file mode 100644 index 0000000..38ccd0d --- /dev/null +++ b/unproto/tok_class.c @@ -0,0 +1,432 @@ +/*++ +/* NAME +/* tok_class 3 +/* SUMMARY +/* token classification +/* PACKAGE +/* unproto +/* SYNOPSIS +/* #include "token.h" +/* +/* void tok_unget(t) +/* struct token *t; +/* +/* struct token *tok_class() +/* DESCRIPTION +/* tok_class() collects single and composite tokens, and +/* recognizes keywords. +/* At present, the only composite tokens are ()-delimited, +/* comma-separated lists, and non-whitespace tokens with attached +/* whitespace or comment tokens. +/* +/* Source transformations are: __DATE__ and __TIME__ are rewritten +/* to string constants with the current date and time, respectively. +/* Multiple string constants are concatenated. Optionally, "void *" +/* is mapped to "char *", and plain "void" to "int". +/* +/* tok_unget() implements an arbitrary amount of token pushback. +/* Only tokens obtained through tok_class() should be given to +/* tok_unget(). This function accepts a list of tokens in +/* last-read-first order. +/* DIAGNOSTICS +/* The code complains if input terminates in the middle of a list. +/* BUGS +/* Does not preserve white space at the beginning of a list element +/* or after the end of a list. +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 92/01/15 21:53:02 +/* VERSION/RELEASE +/* 1.4 +/*--*/ + +static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02"; + +/* C library */ + +#include <stdio.h> + +extern char *strcpy(); +extern long time(); +extern char *ctime(); + +/* Application-specific stuff */ + +#include "error.h" +#include "vstring.h" +#include "token.h" +#include "symbol.h" + +static struct token *tok_list(); +static void tok_list_struct(); +static void tok_list_append(); +static void tok_strcat(); +static void tok_time(); +static void tok_date(); +static void tok_space_append(); + +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) +static void tok_void(); /* rewrite void keyword */ +#endif + +static struct token *tok_buf = 0; /* token push-back storage */ + +/* TOK_PREPEND - add token to LIFO queue, return head */ + +#define TOK_PREPEND(list,t) (t->next = list, list = t) + +/* tok_space_append - append trailing space except at start of or after list */ + +static void tok_space_append(list, t) +register struct token *list; +register struct token *t; +{ + + /* + * The head/tail fields of a token do triple duty. They are used to keep + * track of the members that make up a (list); to keep track of the + * non-blank tokens that make up one list member; and, finally, to tack + * whitespace and comment tokens onto the non-blank tokens that make up + * one list member. + * + * Within a (list), white space and comment tokens are always tacked onto + * the non-blank tokens to avoid parsing complications later on. For this + * reason, blanks and comments at the beginning of a list member are + * discarded because there is no token to tack them onto. (Well, we could + * start each list member with a dummy token, but that would mess up the + * whole unprototyper). + * + * Blanks or comments that follow a (list) are discarded, because the + * head/tail fields of a (list) are already being used for other + * purposes. + * + * Newlines within a (list) are discarded because they can mess up the + * output when we rewrite function headers. The output routines will + * regenerate discarded newlines, anyway. + */ + + if (list == 0 || list->tokno == TOK_LIST) { + tok_free(t); + } else { + tok_list_append(list, t); + } +} + +/* tok_class - discriminate single tokens, keywords, and composite tokens */ + +struct token *tok_class() +{ + register struct token *t; + register struct symbol *s; + + /* + * Use push-back token, if available. Push-back tokens are already + * canonical and can be passed on to the caller without further + * inspection. + */ + + if (t = tok_buf) { + tok_buf = t->next; + t->next = 0; + return (t); + } + /* Read a new token and canonicalize it. */ + + if (t = tok_get()) { + switch (t->tokno) { + case '(': /* beginning of list */ + t = tok_list(t); + break; + case TOK_WORD: /* look up keyword */ + if ((s = sym_find(t->vstr->str))) { + switch (s->type) { + case TOK_TIME: /* map __TIME__ to string */ + tok_time(t); + tok_strcat(t); /* look for more strings */ + break; + case TOK_DATE: /* map __DATE__ to string */ + tok_date(t); + tok_strcat(t); /* look for more strings */ + break; +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) + case TOK_VOID: /* optionally map void types */ + tok_void(t); + break; +#endif + default: /* other keyword */ + t->tokno = s->type; + break; + } + } + break; + case '"': /* string, look for more */ + tok_strcat(t); + break; + } + } + return (t); +} + +/* tok_list - collect ()-delimited, comma-separated list of tokens */ + +static struct token *tok_list(t) +struct token *t; +{ + register struct token *list = tok_alloc(); + char *filename; + int lineno; + + /* Save context of '(' for diagnostics. */ + + filename = t->path; + lineno = t->line; + + list->tokno = TOK_LIST; + list->head = list->tail = t; + list->path = t->path; + list->line = t->line; +#ifdef DEBUG + strcpy(list->vstr->str, "LIST"); +#endif + + /* + * Read until the matching ')' is found, accounting for structured stuff + * (enclosed by '{' and '}' tokens). Break the list up at each ',' token, + * and try to preserve as much whitespace as possible. Newlines are + * discarded so that they will not mess up the layout when we rewrite + * argument lists. The output routines will regenerate discarded + * newlines. + */ + + while (t = tok_class()) { /* skip blanks */ + switch (t->tokno) { + case ')': /* end of list */ + tok_list_append(list, t); + return (list); + case '{': /* struct/union type */ + tok_list_struct(list->tail, t); + break; + case TOK_WSPACE: /* preserve trailing blanks */ + tok_space_append(list->tail->tail, t); /* except after list */ + break; + case '\n': /* fix newlines later */ + tok_free(t); + break; + case ',': /* list separator */ + tok_list_append(list, t); + break; + default: /* other */ + tok_list_append(list->tail, t); + break; + } + } + error_where(filename, lineno, "unmatched '('"); + return (list); /* do not waste any data */ +} + +/* tok_list_struct - collect structured type info within list */ + +static void tok_list_struct(list, t) +register struct token *list; +register struct token *t; +{ + char *filename; + int lineno; + + /* + * Save context of '{' for diagnostics. This routine is called by the one + * that collects list members. If the '}' is not found, the list + * collector will not see the closing ')' either. + */ + + filename = t->path; + lineno = t->line; + + tok_list_append(list, t); + + /* + * Collect tokens until the matching '}' is found. Try to preserve as + * much whitespace as possible. Newlines are discarded so that they do + * not interfere when rewriting argument lists. The output routines will + * regenerate discarded newlines. + */ + + while (t = tok_class()) { + switch (t->tokno) { + case TOK_WSPACE: /* preserve trailing blanks */ + tok_space_append(list->tail, t); /* except after list */ + break; + case '\n': /* fix newlines later */ + tok_free(t); + break; + case '{': /* recurse */ + tok_list_struct(list, t); + break; + case '}': /* done */ + tok_list_append(list, t); + return; + default: /* other */ + tok_list_append(list, t); + break; + } + } + error_where(filename, lineno, "unmatched '{'"); +} + +/* tok_strcat - concatenate multiple string constants */ + +static void tok_strcat(t1) +register struct token *t1; +{ + register struct token *t2; + register struct token *lookahead = 0; + + /* + * Read ahead past whitespace, comments and newlines. If we find a string + * token, concatenate it with the previous one and push back the + * intervening tokens (thus preserving as much information as possible). + * If we find something else, push back all lookahead tokens. + */ + +#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; } + + while (t2 = tok_class()) { + switch (t2->tokno) { + case TOK_WSPACE: /* read past comments/blanks */ + case '\n': /* read past newlines */ + TOK_PREPEND(lookahead, t2); + break; + case '"': /* concatenate string tokens */ + if (vs_strcpy(t1->vstr, + t1->vstr->str + strlen(t1->vstr->str) - 1, + t2->vstr->str + 1) == 0) + fatal("out of memory"); + tok_free(t2); + PUSHBACK_AND_RETURN; + default: /* something else, push back */ + tok_unget(t2); + PUSHBACK_AND_RETURN; + } + } + PUSHBACK_AND_RETURN; /* hit EOF */ +} + +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) + +/* tok_void - support for compilers that have problems with "void" */ + +static void tok_void(t) +register struct token *t; +{ + register struct token *t2; + register struct token *lookahead = 0; + + /* + * Look ahead beyond whitespace, comments and newlines until we see a '*' + * token. If one is found, replace "void" by "char". If we find something + * else, and if "void" should always be mapped, replace "void" by "int". + * Always push back the lookahead tokens. + * + * XXX The code also replaces the (void) argument list; this must be + * accounted for later on. The alternative would be to add (in unproto.c) + * TOK_VOID cases all over the place and that would be too error-prone. + */ + +#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; } + + while (t2 = tok_class()) { + switch (TOK_PREPEND(lookahead, t2)->tokno) { + case TOK_WSPACE: /* read past comments/blanks */ + case '\n': /* read past newline */ + break; + case '*': /* "void *" -> "char *" */ + if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0) + fatal("out of memory"); + PUSHBACK_AND_RETURN; + default: +#ifdef MAP_VOID /* plain "void" -> "int" */ + if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0) + fatal("out of memory"); +#endif + PUSHBACK_AND_RETURN; + } + } + PUSHBACK_AND_RETURN; /* hit EOF */ +} + +#endif + +/* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */ + +static void tok_time(t) +struct token *t; +{ + long now; + char *cp; + char buf[BUFSIZ]; + + /* + * Using sprintf() to select parts of a string is gross, but this should + * be fast enough. + */ + + (void) time(&now); + cp = ctime(&now); + sprintf(buf, "\"%.8s\"", cp + 11); + if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0) + fatal("out of memory"); + t->tokno = buf[0]; +} + +/* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */ + +static void tok_date(t) +struct token *t; +{ + long now; + char *cp; + char buf[BUFSIZ]; + + /* + * Using sprintf() to select parts of a string is gross, but this should + * be fast enough. + */ + + (void) time(&now); + cp = ctime(&now); + sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20); + if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0) + fatal("out of memory"); + t->tokno = buf[0]; +} + +/* tok_unget - push back one or more possibly composite tokens */ + +void tok_unget(t) +register struct token *t; +{ + register struct token *next; + + do { + next = t->next; + TOK_PREPEND(tok_buf, t); + } while (t = next); +} + +/* tok_list_append - append data to list */ + +static void tok_list_append(h, t) +struct token *h; +struct token *t; +{ + if (h->head == 0) { + h->head = h->tail = t; + } else { + h->tail->next = t; + h->tail = t; + } +} |