/*++
/* NAME
/*	tok_class 3
/* SUMMARY
/*	token classification
/* PACKAGE
/*	unproto
/* SYNOPSIS
/*	#include "token.h"
/*
/*	void tok_unget(t)
/*	struct token *t;
/*
/*	struct token *tok_class()
/* DESCRIPTION
/*	tok_class() collects single and composite tokens, and
/*	recognizes keywords.
/*	At present, the only composite tokens are ()-delimited,
/*	comma-separated lists, and non-whitespace tokens with attached
/*	whitespace or comment tokens.
/*
/*	Source transformations are: __DATE__ and __TIME__ are rewritten
/*	to string constants with the current date and time, respectively.
/*	Multiple string constants are concatenated. Optionally, "void *" 
/*	is mapped to "char *", and plain "void" to "int".
/*
/*	tok_unget() implements an arbitrary amount of token pushback.
/*	Only tokens obtained through tok_class() should be given to
/*	tok_unget(). This function accepts a list of tokens in 
/*	last-read-first order.
/* DIAGNOSTICS
/*	The code complains if input terminates in the middle of a list.
/* BUGS
/*	Does not preserve white space at the beginning of a list element
/*	or after the end of a list.
/* AUTHOR(S)
/*	Wietse Venema
/*	Eindhoven University of Technology
/*	Department of Mathematics and Computer Science
/*	Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
/* LAST MODIFICATION
/*	92/01/15 21:53:02
/* VERSION/RELEASE
/*	1.4
/*--*/

static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02";

/* C library */

#include <stdio.h>

extern char *strcpy();
extern long time();
extern char *ctime();

/* Application-specific stuff */

#include "error.h"
#include "vstring.h"
#include "token.h"
#include "symbol.h"

static struct token *tok_list();
static void tok_list_struct();
static void tok_list_append();
static void tok_strcat();
static void tok_time();
static void tok_date();
static void tok_space_append();

#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
static void tok_void();			/* rewrite void keyword */
#endif

static struct token *tok_buf = 0;	/* token push-back storage */

/* TOK_PREPEND - add token to LIFO queue, return head */

#define TOK_PREPEND(list,t) (t->next = list, list = t)

/* tok_space_append - append trailing space except at start of or after list */

static void tok_space_append(list, t)
register struct token *list;
register struct token *t;
{

    /*
     * The head/tail fields of a token do triple duty. They are used to keep
     * track of the members that make up a (list); to keep track of the
     * non-blank tokens that make up one list member; and, finally, to tack
     * whitespace and comment tokens onto the non-blank tokens that make up
     * one list member.
     * 
     * Within a (list), white space and comment tokens are always tacked onto
     * the non-blank tokens to avoid parsing complications later on. For this
     * reason, blanks and comments at the beginning of a list member are
     * discarded because there is no token to tack them onto. (Well, we could
     * start each list member with a dummy token, but that would mess up the
     * whole unprototyper).
     * 
     * Blanks or comments that follow a (list) are discarded, because the
     * head/tail fields of a (list) are already being used for other
     * purposes.
     * 
     * Newlines within a (list) are discarded because they can mess up the
     * output when we rewrite function headers. The output routines will
     * regenerate discarded newlines, anyway.
     */

    if (list == 0 || list->tokno == TOK_LIST) {
	tok_free(t);
    } else {
	tok_list_append(list, t);
    }
}

/* tok_class - discriminate single tokens, keywords, and composite tokens */

struct token *tok_class()
{
    register struct token *t;
    register struct symbol *s;

    /*
     * Use push-back token, if available. Push-back tokens are already
     * canonical and can be passed on to the caller without further
     * inspection.
     */

    if (t = tok_buf) {
	tok_buf = t->next;
	t->next = 0;
	return (t);
    }
    /* Read a new token and canonicalize it. */

    if (t = tok_get()) {
	switch (t->tokno) {
	case '(':				/* beginning of list */
	    t = tok_list(t);
	    break;
	case TOK_WORD:				/* look up keyword */
	    if ((s = sym_find(t->vstr->str))) {
		switch (s->type) {
		case TOK_TIME:			/* map __TIME__ to string */
		    tok_time(t);
		    tok_strcat(t);		/* look for more strings */
		    break;
		case TOK_DATE:			/* map __DATE__ to string */
		    tok_date(t);
		    tok_strcat(t);		/* look for more strings */
		    break;
#if defined(MAP_VOID_STAR) || defined(MAP_VOID)
		case TOK_VOID:			/* optionally map void types */
		    tok_void(t);
		    break;
#endif
		default:			/* other keyword */
		    t->tokno = s->type;
		    break;
		}
	    }
	    break;
	case '"':				/* string, look for more */
	    tok_strcat(t);
	    break;
	}
    }
    return (t);
}

/* tok_list - collect ()-delimited, comma-separated list of tokens */

static struct token *tok_list(t)
struct token *t;
{
    register struct token *list = tok_alloc();
    char   *filename;
    int     lineno;

    /* Save context of '(' for diagnostics. */

    filename = t->path;
    lineno = t->line;

    list->tokno = TOK_LIST;
    list->head = list->tail = t;
    list->path = t->path;
    list->line = t->line;
#ifdef DEBUG
    strcpy(list->vstr->str, "LIST");
#endif

    /*
     * Read until the matching ')' is found, accounting for structured stuff
     * (enclosed by '{' and '}' tokens). Break the list up at each ',' token,
     * and try to preserve as much whitespace as possible. Newlines are
     * discarded so that they will not mess up the layout when we rewrite
     * argument lists. The output routines will regenerate discarded
     * newlines.
     */

    while (t = tok_class()) {			/* skip blanks */
	switch (t->tokno) {
	case ')':				/* end of list */
	    tok_list_append(list, t);
	    return (list);
	case '{':				/* struct/union type */
	    tok_list_struct(list->tail, t);
	    break;
	case TOK_WSPACE:			/* preserve trailing blanks */
	    tok_space_append(list->tail->tail, t);	/* except after list */
	    break;
	case '\n':				/* fix newlines later */
	    tok_free(t);
	    break;
	case ',':				/* list separator */
	    tok_list_append(list, t);
	    break;
	default:				/* other */
	    tok_list_append(list->tail, t);
	    break;
	}
    }
    error_where(filename, lineno, "unmatched '('");
    return (list);				/* do not waste any data */
}

/* tok_list_struct - collect structured type info within list */

static void tok_list_struct(list, t)
register struct token *list;
register struct token *t;
{
    char   *filename;
    int     lineno;

    /*
     * Save context of '{' for diagnostics. This routine is called by the one
     * that collects list members. If the '}' is not found, the list
     * collector will not see the closing ')' either.
     */

    filename = t->path;
    lineno = t->line;

    tok_list_append(list, t);

    /*
     * Collect tokens until the matching '}' is found. Try to preserve as
     * much whitespace as possible. Newlines are discarded so that they do
     * not interfere when rewriting argument lists. The output routines will
     * regenerate discarded newlines.
     */

    while (t = tok_class()) {
	switch (t->tokno) {
	case TOK_WSPACE:			/* preserve trailing blanks */
	    tok_space_append(list->tail, t);	/* except after list */
	    break;
	case '\n':				/* fix newlines later */
	    tok_free(t);
	    break;
	case '{':				/* recurse */
	    tok_list_struct(list, t);
	    break;
	case '}':				/* done */
	    tok_list_append(list, t);
	    return;
	default:				/* other */
	    tok_list_append(list, t);
	    break;
	}
    }
    error_where(filename, lineno, "unmatched '{'");
}

/* tok_strcat - concatenate multiple string constants */

static void tok_strcat(t1)
register struct token *t1;
{
    register struct token *t2;
    register struct token *lookahead = 0;

    /*
     * Read ahead past whitespace, comments and newlines. If we find a string
     * token, concatenate it with the previous one and push back the
     * intervening tokens (thus preserving as much information as possible).
     * If we find something else, push back all lookahead tokens.
     */

#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }

    while (t2 = tok_class()) {
	switch (t2->tokno) {
	case TOK_WSPACE:			/* read past comments/blanks */
	case '\n':				/* read past newlines */
	    TOK_PREPEND(lookahead, t2);
	    break;
	case '"':				/* concatenate string tokens */
	    if (vs_strcpy(t1->vstr,
			  t1->vstr->str + strlen(t1->vstr->str) - 1,
			  t2->vstr->str + 1) == 0)
		fatal("out of memory");
	    tok_free(t2);
	    PUSHBACK_AND_RETURN;
	default:				/* something else, push back */
	    tok_unget(t2);
	    PUSHBACK_AND_RETURN;
	}
    }
    PUSHBACK_AND_RETURN;			/* hit EOF */
}

#if defined(MAP_VOID_STAR) || defined(MAP_VOID)

/* tok_void - support for compilers that have problems with "void" */

static void tok_void(t)
register struct token *t;
{
    register struct token *t2;
    register struct token *lookahead = 0;

    /*
     * Look ahead beyond whitespace, comments and newlines until we see a '*'
     * token. If one is found, replace "void" by "char". If we find something
     * else, and if "void" should always be mapped, replace "void" by "int".
     * Always push back the lookahead tokens.
     * 
     * XXX The code also replaces the (void) argument list; this must be
     * accounted for later on. The alternative would be to add (in unproto.c)
     * TOK_VOID cases all over the place and that would be too error-prone.
     */

#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }

    while (t2 = tok_class()) {
	switch (TOK_PREPEND(lookahead, t2)->tokno) {
	case TOK_WSPACE:			/* read past comments/blanks */
	case '\n':				/* read past newline */
	    break;
	case '*':				/* "void *" -> "char *" */
	    if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0)
		fatal("out of memory");
	    PUSHBACK_AND_RETURN;
	default:
#ifdef MAP_VOID					/* plain "void" -> "int" */
	    if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0)
		fatal("out of memory");
#endif
	    PUSHBACK_AND_RETURN;
	}
    }
    PUSHBACK_AND_RETURN;			/* hit EOF */
}

#endif

/* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */

static void tok_time(t)
struct token *t;
{
    long    now;
    char   *cp;
    char    buf[BUFSIZ];

    /*
     * Using sprintf() to select parts of a string is gross, but this should
     * be fast enough.
     */

    (void) time(&now);
    cp = ctime(&now);
    sprintf(buf, "\"%.8s\"", cp + 11);
    if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
	fatal("out of memory");
    t->tokno = buf[0];
}

/* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */

static void tok_date(t)
struct token *t;
{
    long    now;
    char   *cp;
    char    buf[BUFSIZ];

    /*
     * Using sprintf() to select parts of a string is gross, but this should
     * be fast enough.
     */

    (void) time(&now);
    cp = ctime(&now);
    sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20);
    if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
	fatal("out of memory");
    t->tokno = buf[0];
}

/* tok_unget - push back one or more possibly composite tokens */

void    tok_unget(t)
register struct token *t;
{
    register struct token *next;

    do {
	next = t->next;
	TOK_PREPEND(tok_buf, t);
    } while (t = next);
}

/* tok_list_append - append data to list */

static void tok_list_append(h, t)
struct token *h;
struct token *t;
{
    if (h->head == 0) {
	h->head = h->tail = t;
    } else {
	h->tail->next = t;
	h->tail = t;
    }
}