summaryrefslogtreecommitdiff
path: root/unproto/tok_class.c
blob: 38ccd0d76e1e7376fa2a3c0f62d30824887e0d42 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
/*++
/* NAME
/*	tok_class 3
/* SUMMARY
/*	token classification
/* PACKAGE
/*	unproto
/* SYNOPSIS
/*	#include "token.h"
/*
/*	void tok_unget(t)
/*	struct token *t;
/*
/*	struct token *tok_class()
/* DESCRIPTION
/*	tok_class() collects single and composite tokens, and
/*	recognizes keywords.
/*	At present, the only composite tokens are ()-delimited,
/*	comma-separated lists, and non-whitespace tokens with attached
/*	whitespace or comment tokens.
/*
/*	Source transformations are: __DATE__ and __TIME__ are rewritten
/*	to string constants with the current date and time, respectively.
/*	Multiple string constants are concatenated. Optionally, "void *" 
/*	is mapped to "char *", and plain "void" to "int".
/*
/*	tok_unget() implements an arbitrary amount of token pushback.
/*	Only tokens obtained through tok_class() should be given to
/*	tok_unget(). This function accepts a list of tokens in 
/*	last-read-first order.
/* DIAGNOSTICS
/*	The code complains if input terminates in the middle of a list.
/* BUGS
/*	Does not preserve white space at the beginning of a list element
/*	or after the end of a list.
/* AUTHOR(S)
/*	Wietse Venema
/*	Eindhoven University of Technology
/*	Department of Mathematics and Computer Science
/*	Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
/* LAST MODIFICATION
/*	92/01/15 21:53:02
/* VERSION/RELEASE
/*	1.4
/*--*/

static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02";

/* C library */

#include <stdio.h>

extern char *strcpy();
extern long time();
extern char *ctime();

/* Application-specific stuff */

#include "error.h"
#include "vstring.h"
#include "token.h"
#include "symbol.h"

static struct token *tok_list();
static void tok_list_struct();
static void tok_list_append();
static void tok_strcat();
static void tok_time();
static void tok_date();
static void tok_space_append();

#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
static void tok_void();			/* rewrite void keyword */
#endif

static struct token *tok_buf = 0;	/* token push-back storage */

/* TOK_PREPEND - add token to LIFO queue, return head */

#define TOK_PREPEND(list,t) (t->next = list, list = t)

/* tok_space_append - append trailing space except at start of or after list */

static void tok_space_append(list, t)
register struct token *list;
register struct token *t;
{

    /*
     * The head/tail fields of a token do triple duty. They are used to keep
     * track of the members that make up a (list); to keep track of the
     * non-blank tokens that make up one list member; and, finally, to tack
     * whitespace and comment tokens onto the non-blank tokens that make up
     * one list member.
     * 
     * Within a (list), white space and comment tokens are always tacked onto
     * the non-blank tokens to avoid parsing complications later on. For this
     * reason, blanks and comments at the beginning of a list member are
     * discarded because there is no token to tack them onto. (Well, we could
     * start each list member with a dummy token, but that would mess up the
     * whole unprototyper).
     * 
     * Blanks or comments that follow a (list) are discarded, because the
     * head/tail fields of a (list) are already being used for other
     * purposes.
     * 
     * Newlines within a (list) are discarded because they can mess up the
     * output when we rewrite function headers. The output routines will
     * regenerate discarded newlines, anyway.
     */

    if (list == 0 || list->tokno == TOK_LIST) {
	tok_free(t);
    } else {
	tok_list_append(list, t);
    }
}

/* tok_class - discriminate single tokens, keywords, and composite tokens */

struct token *tok_class()
{
    register struct token *t;
    register struct symbol *s;

    /*
     * Use push-back token, if available. Push-back tokens are already
     * canonical and can be passed on to the caller without further
     * inspection.
     */

    if (t = tok_buf) {
	tok_buf = t->next;
	t->next = 0;
	return (t);
    }
    /* Read a new token and canonicalize it. */

    if (t = tok_get()) {
	switch (t->tokno) {
	case '(':				/* beginning of list */
	    t = tok_list(t);
	    break;
	case TOK_WORD:				/* look up keyword */
	    if ((s = sym_find(t->vstr->str))) {
		switch (s->type) {
		case TOK_TIME:			/* map __TIME__ to string */
		    tok_time(t);
		    tok_strcat(t);		/* look for more strings */
		    break;
		case TOK_DATE:			/* map __DATE__ to string */
		    tok_date(t);
		    tok_strcat(t);		/* look for more strings */
		    break;
#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
		case TOK_VOID:			/* optionally map void types */
		    tok_void(t);
		    break;
#endif
		default:			/* other keyword */
		    t->tokno = s->type;
		    break;
		}
	    }
	    break;
	case '"':				/* string, look for more */
	    tok_strcat(t);
	    break;
	}
    }
    return (t);
}

/* tok_list - collect ()-delimited, comma-separated list of tokens */

static struct token *tok_list(t)
struct token *t;
{
    register struct token *list = tok_alloc();
    char   *filename;
    int     lineno;

    /* Save context of '(' for diagnostics. */

    filename = t->path;
    lineno = t->line;

    list->tokno = TOK_LIST;
    list->head = list->tail = t;
    list->path = t->path;
    list->line = t->line;
#ifdef DEBUG
    strcpy(list->vstr->str, "LIST");
#endif

    /*
     * Read until the matching ')' is found, accounting for structured stuff
     * (enclosed by '{' and '}' tokens). Break the list up at each ',' token,
     * and try to preserve as much whitespace as possible. Newlines are
     * discarded so that they will not mess up the layout when we rewrite
     * argument lists. The output routines will regenerate discarded
     * newlines.
     */

    while (t = tok_class()) {			/* skip blanks */
	switch (t->tokno) {
	case ')':				/* end of list */
	    tok_list_append(list, t);
	    return (list);
	case '{':				/* struct/union type */
	    tok_list_struct(list->tail, t);
	    break;
	case TOK_WSPACE:			/* preserve trailing blanks */
	    tok_space_append(list->tail->tail, t);	/* except after list */
	    break;
	case '\n':				/* fix newlines later */
	    tok_free(t);
	    break;
	case ',':				/* list separator */
	    tok_list_append(list, t);
	    break;
	default:				/* other */
	    tok_list_append(list->tail, t);
	    break;
	}
    }
    error_where(filename, lineno, "unmatched '('");
    return (list);				/* do not waste any data */
}

/* tok_list_struct - collect structured type info within list */

static void tok_list_struct(list, t)
register struct token *list;
register struct token *t;
{
    char   *filename;
    int     lineno;

    /*
     * Save context of '{' for diagnostics. This routine is called by the one
     * that collects list members. If the '}' is not found, the list
     * collector will not see the closing ')' either.
     */

    filename = t->path;
    lineno = t->line;

    tok_list_append(list, t);

    /*
     * Collect tokens until the matching '}' is found. Try to preserve as
     * much whitespace as possible. Newlines are discarded so that they do
     * not interfere when rewriting argument lists. The output routines will
     * regenerate discarded newlines.
     */

    while (t = tok_class()) {
	switch (t->tokno) {
	case TOK_WSPACE:			/* preserve trailing blanks */
	    tok_space_append(list->tail, t);	/* except after list */
	    break;
	case '\n':				/* fix newlines later */
	    tok_free(t);
	    break;
	case '{':				/* recurse */
	    tok_list_struct(list, t);
	    break;
	case '}':				/* done */
	    tok_list_append(list, t);
	    return;
	default:				/* other */
	    tok_list_append(list, t);
	    break;
	}
    }
    error_where(filename, lineno, "unmatched '{'");
}

/* tok_strcat - concatenate multiple string constants */

static void tok_strcat(t1)
register struct token *t1;
{
    register struct token *t2;
    register struct token *lookahead = 0;

    /*
     * Read ahead past whitespace, comments and newlines. If we find a string
     * token, concatenate it with the previous one and push back the
     * intervening tokens (thus preserving as much information as possible).
     * If we find something else, push back all lookahead tokens.
     */

#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }

    while (t2 = tok_class()) {
	switch (t2->tokno) {
	case TOK_WSPACE:			/* read past comments/blanks */
	case '\n':				/* read past newlines */
	    TOK_PREPEND(lookahead, t2);
	    break;
	case '"':				/* concatenate string tokens */
	    if (vs_strcpy(t1->vstr,
			  t1->vstr->str + strlen(t1->vstr->str) - 1,
			  t2->vstr->str + 1) == 0)
		fatal("out of memory");
	    tok_free(t2);
	    PUSHBACK_AND_RETURN;
	default:				/* something else, push back */
	    tok_unget(t2);
	    PUSHBACK_AND_RETURN;
	}
    }
    PUSHBACK_AND_RETURN;			/* hit EOF */
}

#if defined(MAP_VOID_STAR) || defined(MAP_VOID)

/* tok_void - support for compilers that have problems with "void" */

static void tok_void(t)
register struct token *t;
{
    register struct token *t2;
    register struct token *lookahead = 0;

    /*
     * Look ahead beyond whitespace, comments and newlines until we see a '*'
     * token. If one is found, replace "void" by "char". If we find something
     * else, and if "void" should always be mapped, replace "void" by "int".
     * Always push back the lookahead tokens.
     * 
     * XXX The code also replaces the (void) argument list; this must be
     * accounted for later on. The alternative would be to add (in unproto.c)
     * TOK_VOID cases all over the place and that would be too error-prone.
     */

#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }

    while (t2 = tok_class()) {
	switch (TOK_PREPEND(lookahead, t2)->tokno) {
	case TOK_WSPACE:			/* read past comments/blanks */
	case '\n':				/* read past newline */
	    break;
	case '*':				/* "void *" -> "char *" */
	    if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0)
		fatal("out of memory");
	    PUSHBACK_AND_RETURN;
	default:
#ifdef MAP_VOID					/* plain "void" -> "int" */
	    if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0)
		fatal("out of memory");
#endif
	    PUSHBACK_AND_RETURN;
	}
    }
    PUSHBACK_AND_RETURN;			/* hit EOF */
}

#endif

/* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */

static void tok_time(t)
struct token *t;
{
    long    now;
    char   *cp;
    char    buf[BUFSIZ];

    /*
     * Using sprintf() to select parts of a string is gross, but this should
     * be fast enough.
     */

    (void) time(&now);
    cp = ctime(&now);
    sprintf(buf, "\"%.8s\"", cp + 11);
    if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
	fatal("out of memory");
    t->tokno = buf[0];
}

/* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */

static void tok_date(t)
struct token *t;
{
    long    now;
    char   *cp;
    char    buf[BUFSIZ];

    /*
     * Using sprintf() to select parts of a string is gross, but this should
     * be fast enough.
     */

    (void) time(&now);
    cp = ctime(&now);
    sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20);
    if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
	fatal("out of memory");
    t->tokno = buf[0];
}

/* tok_unget - push back one or more possibly composite tokens */

void    tok_unget(t)
register struct token *t;
{
    register struct token *next;

    do {
	next = t->next;
	TOK_PREPEND(tok_buf, t);
    } while (t = next);
}

/* tok_list_append - append data to list */

static void tok_list_append(h, t)
struct token *h;
struct token *t;
{
    if (h->head == 0) {
	h->head = h->tail = t;
    } else {
	h->tail->next = t;
	h->tail = t;
    }
}