1 files changed, 464 insertions, 0 deletions
diff --git a/unproto/tok_io.c b/unproto/tok_io.c
new file mode 100644
index 0000000..ab1129b
--- /dev/null
+++ b/unproto/tok_io.c
@@ -0,0 +1,464 @@
+/*++
+/* NAME
+/*	tok_io 3
+/* SUMMARY
+/*	token I/O
+/* PACKAGE
+/*	unproto
+/* SYNOPSIS
+/*	#include "token.h"
+/*
+/*	struct token *tok_get(skip_flag)
+/*	int skip_flag;
+/*
+/*	void tok_unget(t)
+/*	struct token *t;
+/*
+/*	void tok_flush(t)
+/*	struct token *t;
+/*
+/*	void tok_show(t)
+/*	struct token *t;
+/*
+/*	void put_str(s)
+/*	char *s;
+/*
+/*	void put_ch(c)
+/*	int c;
+/*
+/*	void show_line_control()
+/*
+/*	char curr_path[];
+/*	int curr_line;
+/* DESCRIPTION
+/*	These functions read from stdin and write to stdout. The
+/*	output functions maintain some memory so that two successive
+/*	words will always be separated by white space.
+/*
+/*	The input routines eliminate backslash-newline from the input.
+/*
+/*	tok_get() reads the next token from standard input. It returns
+/*	a null pointer when the end of input is reached. If the skip_flag
+/*	argument is nonzero, white space (except newline) will be skipped.
+/*
+/*	tok_unget() implements a limited amount of token push back.
+/*
+/*	tok_show() displays the contents of a (possibly composite) token
+/*	on the standard output.
+/*
+/*	tok_flush() displays the contents of a (possibly composite) token
+/*	on the standard output and makes it available for re-use.
+/*
+/*	put_str() writes a null-terminated string to standard output.
+/*
+/*	put_ch() writes one character to standard output.
+/*
+/*	show_line_control() displays the line number of the next line
+/*	to be written to standard output, in a format suitable for the C
+/*	compiler parser phase.
+/*
+/*	The curr_path[] and curr_line variables contain the input file name and 
+/*	line number of the most recently read token.
+/* BUGS
+/*	The tokenizer is just good enough for the unproto filter.
+/*	As a benefit, it is quite fast.
+/* AUTHOR(S)
+/*	Wietse Venema
+/*	Eindhoven University of Technology
+/*	Department of Mathematics and Computer Science
+/*	Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
+/* LAST MODIFICATION
+/*	91/11/30 21:10:26
+/* VERSION/RELEASE
+/*	1.2
+/*--*/
+
+static char io_sccsid[] = "@(#) tok_io.c 1.2 91/11/30 21:10:26";
+
+/* C library */
+
+#include <stdio.h>
+#include <ctype.h>
+
+extern char *strchr();
+extern char *malloc();
+extern char *realloc();
+extern char *strcpy();
+
+/* Application-specific stuff */
+
+#include "token.h"
+#include "vstring.h"
+#include "error.h"
+
+/* Stuff to keep track of original source file name and position */
+
+char    curr_path[BUFSIZ];		/* current file name */
+int     curr_line = 0;			/* # of last read line */
+
+/* Forward declarations */
+
+static void read_quoted();
+static void read_comment();
+
+/* Buffered i/o stuff */
+
+static struct vstring *buf = 0;		/* read-ahead buffer */
+static char *bp = "";			/* buffer position */
+
+#ifdef DEBUG
+#define	INITBUF	1			/* small initial buffer size */
+#else
+#define	INITBUF BUFSIZ			/* reasonable initial buffer size */
+#endif
+
+#define	input()		(*bp ? *bp++ : next_line())
+#define	unput(c)	(*--bp = (c))
+
+#define	TOK_BUFSIZE	5		/* token push-back buffer size */
+
+static struct token *tok_buf[TOK_BUFSIZE];
+static int tok_bufpos = 0;
+
+/* Type of last token sent to output, for pretty printing */
+
+static int last_tok = 0;
+
+/* Directives that should be ignored. */
+
+#ifdef IGNORE_DIRECTIVES
+
+static char *ignore_directives[] = {
+    IGNORE_DIRECTIVES,
+    0,
+};
+
+#endif
+
+/* Modified string and ctype stuff. */
+
+#define	STREQUAL(x,y)	(*(x) == *(y) && strcmp((x),(y)) == 0)
+
+#define	ISALNUM(c)	(isalnum(c) || (c) == '_')
+#define	ISALPHA(c)	(isalpha(c) || (c) == '_')
+#define	ISSPACE(c)	(isspace(c) && c != '\n')
+#define	ISDOT(c)	(c == '.')
+
+/* Collect all characters that satisfy one condition */
+
+#define	COLLECT(v,c,cond) { \
+				register struct vstring *vs = v; \
+				register char *cp = vs->str; \
+				*cp++ = c; \
+				for (;;) { \
+				    if ((c = input()) == 0) { \
+					break; \
+				    } else if (cond) { \
+					if (VS_ADDCH(vs, cp, c) == 0) \
+					    error(1, "out of memory"); \
+				    } else { \
+					unput(c); \
+					break; \
+				    } \
+				} \
+				*cp = 0; \
+			    }
+
+/* do_control - parse control line, uses tok_get() */
+
+static int do_control()
+{
+    struct token *t1;
+    struct token *t2;
+    int     pass_thru = 1;		/* 0 = ignore, 1 = output */
+
+    (void) input();				/* skip the hash */
+
+    if (t1 = tok_get(NO_WSPACE)) {
+	switch (t1->tokno) {
+
+	    /*
+	     * In case of line number control, the remainder of the line has
+	     * the format: linenumber "pathname".
+	     */
+	case TOK_NUMBER:
+	    if (t2 = tok_get(NO_WSPACE)) {
+		if (t2->tokno == '"') {
+		    curr_line = atoi(t1->vstr->str) - 1;
+		    strcpy(curr_path, t2->vstr->str);
+		}
+		tok_free(t2);
+	    }
+	    break;
+
+#ifdef IGNORE_DIRECTIVES
+	case TOK_WORD:
+	    /* Optionally ignore other #directives, such as #pragma. */
+	    {
+		char  **cpp;
+		char   *cp = t1->vstr->str;
+
+		for (cpp = ignore_directives; *cpp; cpp++) {
+		    if (STREQUAL(cp, *cpp)) {
+			pass_thru = 0;
+			break;
+		    }
+		}
+	    }
+	    break;
+#endif
+	}
+	tok_free(t1);
+    }
+    return (pass_thru);
+}
+
+/* next_line - read one logical line, handle #control */
+
+static int next_line()
+{
+    register int c;
+    register char *cp;
+
+    /* Allocate buffer upon first entry */
+
+    if (buf == 0)
+	buf = vs_alloc(INITBUF);
+
+    for (;;) {
+	cp = buf->str;
+
+	/* Account for EOF and line continuations */
+
+	while ((c = getchar()) != EOF) {
+	    if (VS_ADDCH(buf, cp, c) == 0)	/* store character */
+		error(1, "out of memory");
+	    if (c == '\n') {			/* real end of line */
+		curr_line++;
+		break;
+	    } else if (c == '\\') {
+		if ((c = getchar()) == EOF) {	/* XXX strip backslash-EOF */
+		    break;
+		} else if (c == '\n') {		/* strip backslash-newline */
+		    curr_line++;
+		    put_ch('\n');		/* preserve line count */
+		    cp--;			/* un-store backslash */
+		} else {
+		    ungetc(c, stdin);		/* keep backslash-other */
+		}
+	    }
+	}
+	*cp = 0;
+	bp = buf->str;
+
+	/* Account for EOF and #control */
+
+	switch (bp[0]) {
+	case 0:					/* EOF */
+	    return (0);
+	case '#':				/* control */
+	    if (do_control())
+		fputs(buf->str, stdout);	/* pass through */
+	    else
+		putchar('\n');			/* filter out */
+	    break;
+	default:				/* non-control */
+	    return (input());
+	}
+    }
+}
+
+/* tok_unget - push back one token */
+
+void    tok_unget(t)
+register struct token *t;
+{
+    if (tok_bufpos >= TOK_BUFSIZE)
+	error(1, "too much pushback");
+    tok_buf[tok_bufpos++] = t;
+}
+
+/* tok_get - get next token */
+
+struct token *tok_get(skip_flag)
+int     skip_flag;
+{
+    register struct token *t;
+    register int c;
+    int     d;
+
+    /* Use push-back token, if any. */
+
+    if (tok_bufpos) {
+	t = tok_buf[--tok_bufpos];
+	return (t);
+    }
+
+    /*
+     * Get one from the pool and fill it in. The loop is here in case we
+     * should skip white-space tokens, which happens in a minority of all
+     * cases.
+     */
+
+    t = tok_alloc();
+
+    for (;;) {
+	if ((c = input()) == 0) {
+	    tok_free(t);
+	    return (0);
+	} else if (!isascii(c)) {
+	    t->vstr->str[0] = c;
+	    t->vstr->str[1] = 0;
+	    t->tokno = TOK_OTHER;
+	    return (t);
+	} else if (c == '"' || c == '\'') {
+	    read_quoted(t, c);
+	    t->tokno = c;
+	    return (t);
+	} else if (ISALPHA(c)) {
+	    COLLECT(t->vstr, c, ISALNUM(c));
+	    t->tokno = TOK_WORD;
+	    return (t);
+	} else if (isdigit(c)) {
+	    COLLECT(t->vstr, c, isdigit(c));
+	    t->tokno = TOK_NUMBER;
+	    return (t);
+	} else if (ISSPACE(c)) {
+	    COLLECT(t->vstr, c, ISSPACE(c));
+	    if (skip_flag)
+		continue;
+	    t->tokno = TOK_WSPACE;
+	    return (t);
+	} else if (ISDOT(c)) {
+	    COLLECT(t->vstr, c, ISDOT(c));
+	    t->tokno = TOK_OTHER;
+	    return (t);
+	} else {
+	    t->vstr->str[0] = c;
+	    if (c == '/') {
+		if ((d = input()) == '*') {
+		    t->vstr->str[1] = d;	/* comment */
+		    read_comment(t->vstr);
+		    if (skip_flag)
+			continue;
+		    t->tokno = TOK_WSPACE;
+		    return (t);
+		} else {
+		    unput(d);
+		}
+	    }
+	    t->vstr->str[1] = 0;
+	    t->tokno = c;
+	    return (t);
+	}
+    }
+}
+
+/* read_qouted - read string or character literal */
+
+static void read_quoted(t, ch)
+register struct token *t;
+int     ch;
+{
+    register char *cp = t->vstr->str;
+    register int c;
+
+    *cp++ = ch;
+
+    while (c = input()) {
+	if (c == '\n') {			/* newline in string */
+	    unput(c);
+	    break;
+	}
+	if (VS_ADDCH(t->vstr, cp, c) == 0)	/* store character */
+	    error(1, "out of memory");
+	if (c == ch)				/* end of string */
+	    break;
+	if (c == '\\')				/* eat next character */
+	    if ((c = input()) != 0 && VS_ADDCH(t->vstr, cp, c) == 0)
+		error(1, "out of memory");
+    }
+    *cp = 0;
+    return;
+}
+
+/* read_comment - stuff a whole comment into one huge token */
+
+static void read_comment(vs)
+register struct vstring *vs;
+{
+    register char *cp = vs->str + 2;	/* skip slash star */
+    register int c;
+    register int d;
+
+    while (c = input()) {
+	if (VS_ADDCH(vs, cp, c) == 0)
+	    error(1, "out of memory");
+	if (c == '*') {
+	    if ((d = input()) == '/') {
+		if (VS_ADDCH(vs, cp, d) == 0)
+		    error(1, "out of memory");
+		break;
+	    } else {
+		unput(d);
+	    }
+	}
+    }
+    *cp = 0;
+}
+
+/* put_str - output a string */
+
+void    put_str(s)
+char   *s;
+{
+    fputs(s, stdout);
+    last_tok = s[0];				/* XXX */
+#ifdef DEBUG
+    fflush(stdout);
+#endif
+}
+
+/* put_ch - put character */
+
+void    put_ch(c)
+int     c;
+{
+    last_tok = putchar(c);
+#ifdef DEBUG
+    fflush(stdout);
+#endif
+}
+
+/* tok_show - output (possibly composite) token */
+
+void    tok_show(t)
+struct token *t;
+{
+    register struct token *p;
+    register struct token *s;
+
+    switch (t->tokno) {
+    case TOK_LIST:
+	for (s = t->head; s; s = s->next) {
+	    put_ch(s->tokno);			/* opening paren or ',' */
+	    for (p = s->head; p; p = p->next)
+		tok_show(p);
+	}
+	put_ch(')');				/* closing paren */
+	break;
+    case TOK_WORD:
+	if (ISALPHA(last_tok))
+	    putchar(' ');
+	/* FALLTRHOUGH */
+    default:
+	fputs(t->vstr->str, stdout);		/* token contents */
+	last_tok = t->vstr->str[0];
+#ifdef DEBUG
+	fflush(stdout);
+#endif
+	if (t->head)				/* trailing blanks */
+            for (p = t->head; p; p = p->next)
+                tok_show(p);
+    }
+}