diff options
Diffstat (limited to 'unproto/tok_class.c')
-rw-r--r-- | unproto/tok_class.c | 334 |
1 files changed, 293 insertions, 41 deletions
diff --git a/unproto/tok_class.c b/unproto/tok_class.c index e25b695..38ccd0d 100644 --- a/unproto/tok_class.c +++ b/unproto/tok_class.c @@ -8,16 +8,26 @@ /* SYNOPSIS /* #include "token.h" /* -/* struct token *tok_class(skip) -/* int skip; +/* void tok_unget(t) +/* struct token *t; +/* +/* struct token *tok_class() /* DESCRIPTION -/* tok_class() collects a single and composite tokens, and +/* tok_class() collects single and composite tokens, and /* recognizes keywords. /* At present, the only composite tokens are ()-delimited, -/* comma-separated lists. +/* comma-separated lists, and non-whitespace tokens with attached +/* whitespace or comment tokens. +/* +/* Source transformations are: __DATE__ and __TIME__ are rewritten +/* to string constants with the current date and time, respectively. +/* Multiple string constants are concatenated. Optionally, "void *" +/* is mapped to "char *", and plain "void" to "int". /* -/* The skip argument has the same meaning as with the tok_get() -/* function. +/* tok_unget() implements an arbitrary amount of token pushback. +/* Only tokens obtained through tok_class() should be given to +/* tok_unget(). This function accepts a list of tokens in +/* last-read-first order. /* DIAGNOSTICS /* The code complains if input terminates in the middle of a list. /* BUGS @@ -29,18 +39,20 @@ /* Department of Mathematics and Computer Science /* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands /* LAST MODIFICATION -/* 91/11/30 21:10:28 +/* 92/01/15 21:53:02 /* VERSION/RELEASE -/* 1.3 +/* 1.4 /*--*/ -static char class_sccsid[] = "@(#) tok_class.c 1.3 91/11/30 21:10:28"; +static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02"; /* C library */ #include <stdio.h> extern char *strcpy(); +extern long time(); +extern char *ctime(); /* Application-specific stuff */ @@ -52,33 +64,107 @@ extern char *strcpy(); static struct token *tok_list(); static void tok_list_struct(); static void tok_list_append(); +static void tok_strcat(); +static void tok_time(); +static void tok_date(); +static void tok_space_append(); + +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) +static void tok_void(); /* rewrite void keyword */ +#endif + +static struct token *tok_buf = 0; /* token push-back storage */ + +/* TOK_PREPEND - add token to LIFO queue, return head */ -/* tok_space_append - append trailing space except after list */ +#define TOK_PREPEND(list,t) (t->next = list, list = t) + +/* tok_space_append - append trailing space except at start of or after list */ + +static void tok_space_append(list, t) +register struct token *list; +register struct token *t; +{ -#define tok_space_append(list,t) { \ - if (list == 0 /* leading space*/ \ - || list->tokno == TOK_LIST) \ - tok_free(t); \ - else \ - tok_list_append(list, t); \ - } + /* + * The head/tail fields of a token do triple duty. They are used to keep + * track of the members that make up a (list); to keep track of the + * non-blank tokens that make up one list member; and, finally, to tack + * whitespace and comment tokens onto the non-blank tokens that make up + * one list member. + * + * Within a (list), white space and comment tokens are always tacked onto + * the non-blank tokens to avoid parsing complications later on. For this + * reason, blanks and comments at the beginning of a list member are + * discarded because there is no token to tack them onto. (Well, we could + * start each list member with a dummy token, but that would mess up the + * whole unprototyper). + * + * Blanks or comments that follow a (list) are discarded, because the + * head/tail fields of a (list) are already being used for other + * purposes. + * + * Newlines within a (list) are discarded because they can mess up the + * output when we rewrite function headers. The output routines will + * regenerate discarded newlines, anyway. + */ + + if (list == 0 || list->tokno == TOK_LIST) { + tok_free(t); + } else { + tok_list_append(list, t); + } +} /* tok_class - discriminate single tokens, keywords, and composite tokens */ -struct token *tok_class(skip) -int skip; +struct token *tok_class() { register struct token *t; register struct symbol *s; - if (t = tok_get(skip)) { + /* + * Use push-back token, if available. Push-back tokens are already + * canonical and can be passed on to the caller without further + * inspection. + */ + + if (t = tok_buf) { + tok_buf = t->next; + t->next = 0; + return (t); + } + /* Read a new token and canonicalize it. */ + + if (t = tok_get()) { switch (t->tokno) { case '(': /* beginning of list */ t = tok_list(t); break; case TOK_WORD: /* look up keyword */ - if (s = sym_find(t->vstr->str)) - t->tokno = s->type; + if ((s = sym_find(t->vstr->str))) { + switch (s->type) { + case TOK_TIME: /* map __TIME__ to string */ + tok_time(t); + tok_strcat(t); /* look for more strings */ + break; + case TOK_DATE: /* map __DATE__ to string */ + tok_date(t); + tok_strcat(t); /* look for more strings */ + break; +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) + case TOK_VOID: /* optionally map void types */ + tok_void(t); + break; +#endif + default: /* other keyword */ + t->tokno = s->type; + break; + } + } + break; + case '"': /* string, look for more */ + tok_strcat(t); break; } } @@ -91,28 +177,35 @@ static struct token *tok_list(t) struct token *t; { register struct token *list = tok_alloc(); - char filename[BUFSIZ]; + char *filename; int lineno; /* Save context of '(' for diagnostics. */ - strcpy(filename, curr_path); - lineno = curr_line; + filename = t->path; + lineno = t->line; list->tokno = TOK_LIST; list->head = list->tail = t; + list->path = t->path; + list->line = t->line; #ifdef DEBUG strcpy(list->vstr->str, "LIST"); #endif - for (;;) { - if ((t = tok_get(DO_WSPACE)) == 0) { /* skip blanks */ - error_where(0, filename, lineno, "unmatched '('"); - return (list); /* do not waste any data */ - } + /* + * Read until the matching ')' is found, accounting for structured stuff + * (enclosed by '{' and '}' tokens). Break the list up at each ',' token, + * and try to preserve as much whitespace as possible. Newlines are + * discarded so that they will not mess up the layout when we rewrite + * argument lists. The output routines will regenerate discarded + * newlines. + */ + + while (t = tok_class()) { /* skip blanks */ switch (t->tokno) { case ')': /* end of list */ - tok_free(t); + tok_list_append(list, t); return (list); case '{': /* struct/union type */ tok_list_struct(list->tail, t); @@ -120,20 +213,19 @@ struct token *t; case TOK_WSPACE: /* preserve trailing blanks */ tok_space_append(list->tail->tail, t); /* except after list */ break; - case '\n': /* preserve line count */ - tok_flush(t); + case '\n': /* fix newlines later */ + tok_free(t); break; case ',': /* list separator */ tok_list_append(list, t); break; - case '(': /* beginning of list */ - tok_list_append(list->tail, tok_list(t)); - break; - default: /* ordinary token */ + default: /* other */ tok_list_append(list->tail, t); break; } } + error_where(filename, lineno, "unmatched '('"); + return (list); /* do not waste any data */ } /* tok_list_struct - collect structured type info within list */ @@ -142,16 +234,35 @@ static void tok_list_struct(list, t) register struct token *list; register struct token *t; { + char *filename; + int lineno; + + /* + * Save context of '{' for diagnostics. This routine is called by the one + * that collects list members. If the '}' is not found, the list + * collector will not see the closing ')' either. + */ + + filename = t->path; + lineno = t->line; + tok_list_append(list, t); - while (t = tok_class(DO_WSPACE)) { + /* + * Collect tokens until the matching '}' is found. Try to preserve as + * much whitespace as possible. Newlines are discarded so that they do + * not interfere when rewriting argument lists. The output routines will + * regenerate discarded newlines. + */ + + while (t = tok_class()) { switch (t->tokno) { - case '\n': /* preserve line count */ - tok_flush(t); - break; case TOK_WSPACE: /* preserve trailing blanks */ tok_space_append(list->tail, t); /* except after list */ break; + case '\n': /* fix newlines later */ + tok_free(t); + break; case '{': /* recurse */ tok_list_struct(list, t); break; @@ -163,6 +274,147 @@ register struct token *t; break; } } + error_where(filename, lineno, "unmatched '{'"); +} + +/* tok_strcat - concatenate multiple string constants */ + +static void tok_strcat(t1) +register struct token *t1; +{ + register struct token *t2; + register struct token *lookahead = 0; + + /* + * Read ahead past whitespace, comments and newlines. If we find a string + * token, concatenate it with the previous one and push back the + * intervening tokens (thus preserving as much information as possible). + * If we find something else, push back all lookahead tokens. + */ + +#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; } + + while (t2 = tok_class()) { + switch (t2->tokno) { + case TOK_WSPACE: /* read past comments/blanks */ + case '\n': /* read past newlines */ + TOK_PREPEND(lookahead, t2); + break; + case '"': /* concatenate string tokens */ + if (vs_strcpy(t1->vstr, + t1->vstr->str + strlen(t1->vstr->str) - 1, + t2->vstr->str + 1) == 0) + fatal("out of memory"); + tok_free(t2); + PUSHBACK_AND_RETURN; + default: /* something else, push back */ + tok_unget(t2); + PUSHBACK_AND_RETURN; + } + } + PUSHBACK_AND_RETURN; /* hit EOF */ +} + +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) + +/* tok_void - support for compilers that have problems with "void" */ + +static void tok_void(t) +register struct token *t; +{ + register struct token *t2; + register struct token *lookahead = 0; + + /* + * Look ahead beyond whitespace, comments and newlines until we see a '*' + * token. If one is found, replace "void" by "char". If we find something + * else, and if "void" should always be mapped, replace "void" by "int". + * Always push back the lookahead tokens. + * + * XXX The code also replaces the (void) argument list; this must be + * accounted for later on. The alternative would be to add (in unproto.c) + * TOK_VOID cases all over the place and that would be too error-prone. + */ + +#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; } + + while (t2 = tok_class()) { + switch (TOK_PREPEND(lookahead, t2)->tokno) { + case TOK_WSPACE: /* read past comments/blanks */ + case '\n': /* read past newline */ + break; + case '*': /* "void *" -> "char *" */ + if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0) + fatal("out of memory"); + PUSHBACK_AND_RETURN; + default: +#ifdef MAP_VOID /* plain "void" -> "int" */ + if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0) + fatal("out of memory"); +#endif + PUSHBACK_AND_RETURN; + } + } + PUSHBACK_AND_RETURN; /* hit EOF */ +} + +#endif + +/* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */ + +static void tok_time(t) +struct token *t; +{ + long now; + char *cp; + char buf[BUFSIZ]; + + /* + * Using sprintf() to select parts of a string is gross, but this should + * be fast enough. + */ + + (void) time(&now); + cp = ctime(&now); + sprintf(buf, "\"%.8s\"", cp + 11); + if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0) + fatal("out of memory"); + t->tokno = buf[0]; +} + +/* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */ + +static void tok_date(t) +struct token *t; +{ + long now; + char *cp; + char buf[BUFSIZ]; + + /* + * Using sprintf() to select parts of a string is gross, but this should + * be fast enough. + */ + + (void) time(&now); + cp = ctime(&now); + sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20); + if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0) + fatal("out of memory"); + t->tokno = buf[0]; +} + +/* tok_unget - push back one or more possibly composite tokens */ + +void tok_unget(t) +register struct token *t; +{ + register struct token *next; + + do { + next = t->next; + TOK_PREPEND(tok_buf, t); + } while (t = next); } /* tok_list_append - append data to list */ |