diff options
author | Robert de Bath <rdebath@poboxes.com> | 2002-06-08 09:02:15 +0200 |
---|---|---|
committer | Lubomir Rintel <lkundrak@v3.sk> | 2013-10-23 23:37:27 +0200 |
commit | 9696d7b0e1f3a1b0f5fd4a0428eb75afe8ad4ed6 (patch) | |
tree | 11028ecb8d89780260c4d0e5f6879f84ffac0b90 | |
download | dev86-9696d7b0e1f3a1b0f5fd4a0428eb75afe8ad4ed6.tar.gz |
Import orig/unproto
-rw-r--r-- | Makefile | 123 | ||||
-rw-r--r-- | README | 160 | ||||
-rw-r--r-- | acc.sh | 35 | ||||
-rw-r--r-- | cpp.sh | 35 | ||||
-rw-r--r-- | error.c | 97 | ||||
-rw-r--r-- | error.h | 6 | ||||
-rw-r--r-- | example.c | 222 | ||||
-rw-r--r-- | example.out | 271 | ||||
-rw-r--r-- | hash.c | 54 | ||||
-rw-r--r-- | stdarg.h | 85 | ||||
-rw-r--r-- | stddef.h | 23 | ||||
-rw-r--r-- | stdlib.h | 53 | ||||
-rw-r--r-- | strsave.c | 71 | ||||
-rw-r--r-- | symbol.c | 144 | ||||
-rw-r--r-- | symbol.h | 11 | ||||
-rw-r--r-- | tok_class.c | 432 | ||||
-rw-r--r-- | tok_io.c | 612 | ||||
-rw-r--r-- | tok_pool.c | 103 | ||||
-rw-r--r-- | token.h | 55 | ||||
-rw-r--r-- | unproto.1 | 152 | ||||
-rw-r--r-- | unproto.c | 999 | ||||
-rw-r--r-- | unproto5.shar | 4191 | ||||
-rw-r--r-- | varargs.c | 32 | ||||
-rw-r--r-- | vstring.c | 122 | ||||
-rw-r--r-- | vstring.h | 15 |
25 files changed, 8103 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2d7a98c --- /dev/null +++ b/Makefile @@ -0,0 +1,123 @@ +# @(#) Makefile 1.6 93/06/18 22:29:40 + +## BEGIN CONFIGURATION STUFF + +# In the unlikely case that your compiler has no hooks for alternate +# compiler passes, use a "cc cflags -E file.c | unproto >file.i" +# pipeline, then "cc cflags -c file.i" to compile the resulting +# intermediate file. +# +# Otherwise, the "/lib/cpp | unproto" pipeline can be packaged as an +# executable shell script (see the provided "cpp.sh" script) that should +# be installed as "/whatever/cpp". This script should then be specified +# to the C compiler as a non-default preprocessor. +# +# PROG = unproto +# PIPE = + +# The overhead and problems of shell script interpretation can be +# eliminated by having the unprototyper program itself open the pipe to +# the preprocessor. In that case, define the PIPE_THROUGH_CPP macro as +# the path name of the default C preprocessor (usually "/lib/cpp"), +# install the unprototyper as "/whatever/cpp" and specify that to the C +# compiler as a non-default preprocessor. +# +PROG = cpp +PIPE = -DPIPE_THROUGH_CPP=\"/lib/cpp\" + +# Some compilers complain about some #directives. The following is only a +# partial solution, because the directives are still seen by /lib/cpp. +# Be careful with filtering out #pragma, because some pre-ANSI compilers +# (SunOS) rely on its use. +# +# SKIP = -DIGNORE_DIRECTIVES=\"pragma\",\"foo\",\"bar\" +# +SKIP = + +# The bell character code depends on the character set. With ASCII, it is +# 7. Specify a string constant with exactly three octal digits. If you +# change this definition, you will have to update the example.out file. +# +BELL = -DBELL=\"007\" + +# Some C compilers have problems with "void". The nature of the problems +# depends on the age of the compiler. +# +# If your compiler does not understand "void" at all, compile with +# -DMAP_VOID. The unprototyper will replace "void *" by "char *", a +# (void) argument list by an empty one, and will replace all other +# instances of "void" by "int". +# +# If your compiler has problems with "void *" only, compile with +# -DMAP_VOID_STAR. The unprototyper will replace "void *" by "char *", +# and will replace a (void) argument list by an empty one. All other +# instances of "void" will be left alone. +# +# If neither of these are defined, (void) argument lists will be replaced +# by empty ones. +# +# MAP = -DMAP_VOID_STAR + +# Now that we have brought up the subject of antique C compilers, here's +# a couple of aliases that may be useful, too. +# +# ALIAS = -Dstrchr=index + +# If you need support for functions that implement ANSI-style variable +# length argument lists, edit the stdarg.h file provided with this +# package so that it contains the proper definitions for your machine. + +## END CONFIGURATION STUFF + +SHELL = /bin/sh + +CFILES = unproto.c tok_io.c tok_class.c tok_pool.c vstring.c symbol.c error.c \ + hash.c strsave.c +HFILES = error.h token.h vstring.h symbol.h +SCRIPTS = cpp.sh acc.sh +SAMPLES = stdarg.h stddef.h stdlib.h varargs.c example.c example.out +SOURCES = README $(CFILES) $(HFILES) Makefile $(SCRIPTS) $(SAMPLES) +FILES = $(SOURCES) unproto.1 +OBJECTS = tok_io.o tok_class.o tok_pool.o unproto.o vstring.o symbol.o error.o \ + hash.o strsave.o + +CFLAGS = -O $(PIPE) $(SKIP) $(BELL) $(MAP) $(ALIAS) +#CFLAGS = -O $(PIPE) $(SKIP) $(BELL) $(MAP) $(ALIAS) -p -Dstatic= +#CFLAGS = -g $(PIPE) $(SKIP) $(BELL) $(MAP) $(ALIAS) -DDEBUG + +$(PROG): $(OBJECTS) + $(CC) $(CFLAGS) -o $@ $(OBJECTS) $(MALLOC) + +# For linting, enable all bells and whistles. + +lint: + lint -DPIPE_THROUGH_CPP=\"foo\" -DIGNORE_DIRECTIVES=\"foo\",\"bar\" \ + $(BELL) -DMAP_VOID $(ALIAS) $(CFILES) + +# Testing requires that the program is compiled with -DDEBUG. + +test: $(PROG) cpp example.c example.out + ./cpp example.c >example.tmp + @echo the following diff command should produce no output + diff -b example.out example.tmp + rm -f example.tmp + +shar: $(FILES) + @shar $(FILES) + +archive: + $(ARCHIVE) $(SOURCES) + +clean: + rm -f *.o core cpp unproto mon.out varargs.o varargs example.tmp + +error.o : error.c token.h error.h Makefile +hash.o : hash.c Makefile +strsave.o : strsave.c error.h Makefile +symbol.o : symbol.c error.h token.h symbol.h Makefile +tok_class.o : tok_class.c error.h vstring.h token.h symbol.h Makefile +tok_io.o : tok_io.c token.h vstring.h error.h Makefile +tok_pool.o : tok_pool.c token.h vstring.h error.h Makefile +unproto.o : unproto.c vstring.h stdarg.h token.h error.h symbol.h Makefile +varargs.o : varargs.c stdarg.h Makefile +vstring.o : vstring.c vstring.h Makefile @@ -0,0 +1,160 @@ +@(#) README 1.6 93/06/18 22:29:34 + +unproto - Compile ANSI C with traditional UNIX C compiler + +Description: +------------ + +This is a filter that sits in between the UNIX C preprocessor and the +next UNIX C compiler stage, on the fly transforming ANSI C syntax to +old C syntax. Line number information is preserved so that compiler +diagnostics still make sense. It runs at roughly the same speed as +/lib/cpp, so it has negligible impact on compilation time. + +Typically, the program is invoked by the native UNIX C compiler as an +alternate preprocessor. The unprototyper in turn invokes the native C +preprocessor and massages its output. Similar tricks can be used with +the lint(1) command. Details are given below. + +The filter rewrites ANSI-style function headings, function pointer +types and type casts, function prototypes, and combinations thereof. +Unlike some other unprototypers, this one is fully recursive and does +not depend on source file layout (see the example.c file). + +Besides the rewriting of argument lists, the program does the following +transformations: string concatenation, conversion of \a and \x escape +sequences to their octal equivalents, translation of the __TIME__ and +__DATE__ macros, optional mapping of `void *' to `char *', and optional +mapping of plain `void' to `int'. + +The unprototyper provides hooks for compilers that require special +tricks for variadic functions (fortunately, many don't). <stdarg.h> +support is provided for sparc, mips, mc68k, 80x86, vax, and others. + +The program has been tested with SunOS 4.1.1 (sparc), Ultrix 4.0 and +4.2 (mips), and Microport System V Release 2 (80286). It should work +with almost every PCC-based UNIX C compiler. + +Restrictions: +------------- + +A description of restrictions and workarounds can be found in the +unproto.1 manual page. + +Problems fixed with this release: +--------------------------------- + +Prototypes and definitions of functions returning pointer to function +were not rewritten to old style. + +Operation: +---------- + +This package implements a non-default C preprocessor (the output from +the default C preprocessor being piped through the unprototyper). How +one tells the C compiler to use a non-default preprocessor program is +somewhat compiler-dependent: + + SunOS 4.x: cc -Qpath directory_with_alternate_cpp ... + + Ultrix 4.x: cc -tp -hdirectory_with_alternate_cpp -B ... + + System V.2: cc -Bdirectory_with_alternate_cpp/ -tp ... + +Examples of these, and others, can be found in the acc.sh shell script +that emulates an ANSI C compiler. Your C compiler manual page should +provide the necessary information. + +A more portable, but less efficient, approach relies on the observation +that almost every UNIX C compiler supports the -E (write preprocessor +output to stdout) and -P options (preprocess file.c into file.i). Just +add the following lines to your Makefiles: + + .c.o: + $(CC) $(CFLAGS) -E $*.c | unproto >$*.i # simulate -P option + $(CC) $(CFLAGS) -c $*.i + rm -f $*.i + +On some systems the lint(1) command is just a shell script, and writing +a version that uses the unprototyper should not be too hard. With SunOS +4.x, /usr/bin/lint is not a shell script, but it does accept the same +syntax as the cc(1) command for the specification of a non-default +compiler pass. + +You may have to do some research on the lint command provided with your +own machine. + +Configuration: +-------------- + +Check the contents of the `stdarg.h' file provided with this package. +This file serves a dual purpose: (1) on systems that do not provide a +stdarg.h file, it should be included by C source files that implements +ANSI-style variadic functions; (2) it is also used to configure the +unprototyper so that it emits the proper magic when it sees `...'. + +The `stdarg.h' file has support for sparc, mips, and for compilers that +pass arguments via the stack (typical for 80*86, mc68k and vax). It +gives general hints for other compilers. + +The other sample header files (stddef.h and stdlib.h) are not required +to build the unprototyper. + +The `varargs.c' file provided with this package can be used to verify +that the `stdarg.h' file has been set up correctly. + +If your C compiler has no hooks for an alternate preprocessor (the +unprototyper will be used as: `cc cflags -E file.c | unproto >file.i'), +build the `unproto' executable without the `PIPE_THROUGH_CPP' feature. +Details are given in the Makefile. + +Otherwise, the `cpp.sh' shell script can be used to set up the pipe +between the native C preprocessor and the unprototyper command. The +script assumes that the unprototyper binary is called `unproto', and +that it was compiled without the `PIPE_THROUGH_CPP' feature. See the +Makefile and the `cpp.sh' script for details and for a description of +possible problems with this approach. + +The overhead and problems of shell-script interpretation can be avoided +by letting the unprototyper itself pipe its standard input through the +C preprocessor. For this mode of operation, the unprototyper binary +should be called `cpp', and the `unproto.c' source file should be +compiled with the `PIPE_THROUGH_CPP' macro defined as the absolute +pathname of the native C preprocessor (usually `/lib/cpp'). See the +Makefile for details. + +Installation: +------------- + +Install the `unproto.1' manual page in a suitable place. If your system +does not provide a `stdarg.h' file, find a suitable place for the one +provided with the unprototyper and install it there. The same goes for +the sample stddef.h and stdlib.h files; make sure that the definitions +in there apply to your environment. Most or all of the latter files are +already part of Ultrix 4.x and SunOS 4.1.1. + +The ANSI float.h and limits.h files can be generated with the config +program by Steve Pemberton (comp.sources.misc volume 10, issue 62, +available from ftp.uu.net as comp.sources.misc/volume10/config42.Z). + +If you run the unprototyper with "cc -E" just install the `unproto' +binary; the `cpp' and `acc' shell scripts will not be needed. + +If you use the `cpp' shell script to pipe the preprocessor output +through the unprototyper program, install the `unproto' binary in a +place where the `cpp' shell script can find it, and install the `cpp' +shell script in a suitable place. Edit the `acc' shell script and +install it in a suitable place. From now on, type `acc' instead of +`cc'. + +If the unprototyper itself opens the pipe to the C preprocessor (i.e. +the unprototyper was built with the `PIPE_THROUGH_CPP' macro defined), +install the `cpp' unprototyper binary in a suitable place. Edit the +`acc' shell script and install it in a suitable place. From now on, +type `acc' instead of `cc'. + + Wietse Venema + wietse@wzv.win.tue.nl + Mathematics and Computing Science + Eindhoven University of Technology + The Netherlands @@ -0,0 +1,35 @@ +#!/bin/sh + +# @(#) acc.sh 1.1 93/06/18 22:29:42 +# +# Script to emulate most of an ANSI C compiler with a traditional UNIX +# C compiler. + +# INCDIR should be the directory with auxiliary include files from the +# unproto source distribution (stdarg.h, stdlib.h, stddef.h, and other +# stuff that is missing from your compilation environment). With Ultrix +# 4.[0-2] you need unproto's stdarg.h even though the system provides +# one. +# +INCDIR=. + +# CPPDIR should be the directory with the unprototypeing cpp filter +# (preferably the version with the PIPE_THROUGH_CPP feature). +# +CPPDIR=. + +# DEFINES: you will want to define volatile and const, and maybe even +# __STDC__. +# +DEFINES="-Dvolatile= -Dconst= -D__STDC__" + +# Possible problem: INCDIR should be listed after the user-specified -I +# command-line options, not before them as we do here. This is a problem +# only if you attempt to redefine system libraries. +# +# Choose one of the commands below that is appropriate for your system. +# +exec cc -Qpath ${CPPDIR} -I${INCDIR} ${DEFINES} "$@" # SunOS 4.x +exec cc -tp -h${CPPDIR} -B -I${INCDIR} ${DEFINES} "$@" # Ultrix 4.2 +exec cc -Yp,${CPPDIR} -I${INCDIR} ${DEFINES} "$@" # M88 SysV.3 +exec cc -B${CPPDIR}/ -tp -I${INCDIR} ${DEFINES} "$@" # System V.2 @@ -0,0 +1,35 @@ +#!/bin/sh + +# @(#) cpp.sh 1.3 92/01/15 21:53:22 + +# Unprototypeing preprocessor for pre-ANSI C compilers. On some systems, +# this script can be as simple as: +# +# /lib/cpp "$@" | unproto +# +# However, some cc(1) drivers specify output file names on the +# preprocessor command line, so this shell script must be prepared to +# intercept them. Depending on the driver program, the cpp options may +# even go before or after the file name argument(s). The script below +# tries to tackle all these cases. +# +# You may want to add -Ipath_to_stdarg.h_file, -Dvoid=, -Dvolatile=, +# and even -D__STDC__. + +cpp_args="" + +while : +do + case $1 in + "") break;; + -*) cpp_args="$cpp_args $1";; + *) cpp_args="$cpp_args $1" + case $2 in + ""|-*) ;; + *) exec 1> $2 || exit 1; shift;; + esac;; + esac + shift +done + +/lib/cpp $cpp_args | unproto @@ -0,0 +1,97 @@ +/*++ +/* NAME +/* error 3 +/* SUMMARY +/* diagnostics +/* PACKAGE +/* unproto +/* SYNOPSIS +/* #include "error.h" +/* +/* int errcount; +/* +/* void error(text) +/* char *text; +/* +/* void error_where(path, line, text) +/* char *path; +/* int line; +/* char *text; +/* +/* void fatal(text) +/* char *text; +/* DESCRIPTION +/* The routines in this file print a diagnostic (text). Some also +/* terminate the program. Upon each error*() call, the errcount variable +/* is incremented. +/* +/* error() provides a default context, i.e. the source-file +/* coordinate of the last read token. +/* +/* error_where() allows the caller to explicitly specify context: path +/* is a source-file name, and line is a line number. +/* +/* fatal() is like error() but terminates the program with a non-zero +/* exit status. +/* +/* context is ignored if the line number is zero or if the path +/* is an empty string. +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 92/01/15 21:53:10 +/* VERSION/RELEASE +/* 1.2 +/*--*/ + +static char error_sccsid[] = "@(#) error.c 1.2 92/01/15 21:53:10"; + +/* C library */ + +#include <stdio.h> + +extern void exit(); + +/* Application-specific stuff */ + +#include "token.h" +#include "error.h" + +int errcount = 0; /* error counter */ + +/* error - report problem (implicit context) */ + +void error(text) +char *text; +{ + error_where(in_path, in_line, text); +} + +/* error_where - report problem (explicit context) */ + +void error_where(path, line, text) +char *path; +int line; +char *text; +{ + errcount++; + + /* Suppress context info if there is none. */ + + if (line && path[0]) + fprintf(stderr, "%s, line %d: ", path, line); + + fprintf(stderr, "%s\n", text); +} + +/* fatal - report problem and terminate unsuccessfully */ + +void fatal(text) +char *text; +{ + error(text); + exit(1); +} @@ -0,0 +1,6 @@ +/* @(#) error.h 1.2 92/01/15 21:53:14 */ + +extern int errcount; /* error counter */ +extern void error(); /* default context */ +extern void error_where(); /* user-specified context */ +extern void fatal(); /* fatal error */ diff --git a/example.c b/example.c new file mode 100644 index 0000000..bf2f838 --- /dev/null +++ b/example.c @@ -0,0 +1,222 @@ + /* + * @(#) example.c 1.5 93/06/18 22:29:46 + * + * Examples of things that can be done with the unproto package + */ + +typedef char *charstar; + + /* + * New-style argument list with structured argument, one field being pointer + * to function returning pointer to function with function-pointer argument + */ + +x(struct { + struct { + int (*(*foo) (int (*arg1) (double))) (float arg2); + } foo; +} baz) { + return (0); +} + + /* New-style function-pointer declaration. */ + +int (*(*bar0) (float)) (int); + + /* Old-style argument list with new-style argument type. */ + +baz0(bar) +int (*(*bar) (float)) (int); +{} + + /* + * New-style argument list with new-style argument type, declaration + * embedded within block. Plus a couple assignments with function calls that + * look like casts. + */ + +foo(int (*(*bar) (float)) (int)) +{ + int (*baz) (int) = (int (*) (int)) 0, + y = (y * (*baz) (y)), + *(*z) (int) = (int *(*) (int)) 0; + + struct { int (*foo)(int); } *(*s)(int) = + (struct { int (*foo)(int); } *(*)(int)) 0; + + { + y = (y * (*baz) (y)); + } + { + z = (int *(*) (int)) 0; + } + { + s = (struct { int (*foo)(int); } *(*)(int)) 0; + } + + return (0); +} + +/* Multiple declarations in one statement */ + +test1() +{ + int foo2,*(*(*bar)(int))(float),*baz(double); +} + +/* Discriminate declarations from executable statements */ + +test2(charstar y) +{ + int foo = 5,atoi(charstar); + + foo = 5,atoi(y); +} + +/* Declarations without explicit type */ + +test3,test4(int); + +test5(int y) +{ + { + test3; + } + { + test4(y); + } +} + +test6[1],test7(int); + +test7(int x) +{ + { + test6[1]; + } + { + test7(x); + } +} + +/* Checking a complicated cast */ + +struct { + struct { + int (*f)(int), o; + } bar; +} (*baz2)(int) = (struct { struct { int (*f)(int), o; } bar; } (*)(int)) 0; + +/* Distinguish things with the same shape but with different meaning */ + +test8(x) +{ + { + struct { + int foo; + } bar(charstar); + } + { + do { + int foo; + } while (x); + } +} + +/* Do not think foo(*bar) is a function pointer declaration */ + +test9(char *bar) +{ + foo(*bar); +} + +/* another couple of special-cased words. */ + +test10(int x) +{ + { + int test10(int); + do test10(x); + while (x); + } + { + return test10(x); + } +} + +test11(int *x) +{ + while (*x) + (putchar(*x++)); +} + +test11a(int *x) +{ + for (*x;;) + (putchar(*x++)); +} + +/* #include directive between stuff that requires lookahead */ + +test12() +{ + char *x = "\xf\0002\002\02\2" /* foo */ +#include "/dev/null" + "\abar"; + + printf("foo" /* 1 */ "bar" /* 2 */ "baz"); + + *x = '\a'; + *x = '\xff'; +} + +int test13(void); + +/* line continuations in the middle of tokens */ + +te\ +st14(); +charstar test15 = "foo\ +bar"; +char test16 = "foo\\ +abar"; + +/* Array dimensions with unexpanded macros */ + +test17(charstar foo[bar]){} + +int (*(*test18[bar])(charstar))(charstar) = \ + (int (*(*[bar])(charstar))(charstar)) 0; + +/* Function returning pointer to function */ + +int (*(*test19(long))(int))(double); + +/* GCC accepts the following stuff, K&R C does not... */ + +void test20(int test21(double)) {} + +void test22(struct { int foo; } test23(short)) {} + +/* Do not blindly rewrite (*name(stuff))(otherstuff) */ + +void test23() +{ + int (*test24(int)) (int), + y = (*test24(2)) (3), + z = ((*test24(2)) (3)); +} + +/* Function returning pointer to function */ + +int (*(*test25(long foo))(int bar))(double baz){ /* body */ } + +int (*(*test26(foo))())() +long foo; +{ /* body */ } + +#define ARGSTR() struct {int l; char c[1];} + +void functie(ARGSTR() *cmdlin, ARGSTR() *c1) +{ +} diff --git a/example.out b/example.out new file mode 100644 index 0000000..0b14e1b --- /dev/null +++ b/example.out @@ -0,0 +1,271 @@ +# 1 "example.c" + + + + + + +typedef char *charstar; + + + + + + +x( + + + +baz) +# 14 "example.c" +struct { + struct { + int (*(*foo)())(); + } foo; +} baz; +# 18 "example.c" +{/*1*/ + /* end dcls */return (0); +}/*1*/ + + + +int (*(*bar0)())(); + + + +baz0(bar) +int (*(*bar)())(); +{/*1*/}/*1*/ + + + + + + + +foo(bar) +# 38 "example.c" +int (*(*bar)())(); +{/*1*/ + int (*baz)()= (int (*)()) 0, + y = (y * (*baz)(y)), + *(*z)()= (int *(*)()) 0; + + struct {/*2*/ int (*foo)(); }/*2*/ *(*s)()= + (struct { int (*foo)(); } *(*)()) 0; + + /* end dcls */{/*2*/ + y /* end dcls */= (y * (*baz)(y)); + }/*2*/ + {/*2*/ + z /* end dcls */= (int *(*)()) 0; + }/*2*/ + {/*2*/ + s /* end dcls */= (struct { int (*foo)(); } *(*)()) 0; + }/*2*/ + + return (0); +}/*1*/ + + + +test1() +{/*1*/ + int foo2,*(*(*bar)())(),*baz(); +}/*1*/ + + + +test2(y) +# 69 "example.c" +charstar y; +{/*1*/ + int foo = 5,atoi(); + + foo /* end dcls */= 5,atoi(y); +}/*1*/ + + + +test3,test4(); + +test5(y) +# 80 "example.c" +int y; +{/*1*/ + /* end dcls */{/*2*/ + test3/* end dcls */; + }/*2*/ + {/*2*/ + test4/* end dcls */(y); + }/*2*/ +}/*1*/ + +test6[1],test7(); + +test7(x) +# 92 "example.c" +int x; +{/*1*/ + /* end dcls */{/*2*/ + test6/* end dcls */[1]; + }/*2*/ + {/*2*/ + test7/* end dcls */(x); + }/*2*/ +}/*1*/ + + + +struct {/*1*/ + struct {/*2*/ + int (*f)(), o; + }/*2*/ bar; +}/*1*/ (*baz2)()= (struct { struct { int (*f)(), o; } bar; } (*)()) 0; + + + +test8(x) +{/*1*/ + /* end dcls */{/*2*/ + struct {/*3*/ + int foo; + }/*3*/ bar(); + }/*2*/ + {/*2*/ + /* end dcls */do {/*3*/ + int foo; + }/*3*/ while (x); + }/*2*/ +}/*1*/ + + + +test9(bar) +# 128 "example.c" +char *bar; +{/*1*/ + foo/* end dcls */(*bar); +}/*1*/ + + + +test10(x) +# 135 "example.c" +int x; +{/*1*/ + /* end dcls */{/*2*/ + int test10(); + /* end dcls */do test10(x); + while (x); + }/*2*/ + {/*2*/ + /* end dcls */return test10(x); + }/*2*/ +}/*1*/ + +test11(x) +# 147 "example.c" +int *x; +{/*1*/ + /* end dcls */while (*x) + (putchar(*x++)); +}/*1*/ + +test11a(x) +# 153 "example.c" +int *x; +{/*1*/ + /* end dcls */for (*x;;) + (putchar(*x++)); +}/*1*/ + + + +test12() +{/*1*/ + char *x = +# 1 "/dev/null" 1 +# 165 "example.c" 2 +# 163 "example.c" +"\017\0002\002\002\002\007bar" + + ; + + printf/* end dcls */("foobarbaz" ); + + *x = '\007'; + *x = '\377'; +}/*1*/ + +int test13(); + + + +test14(); + +charstar test15 = "foobar"; + +char test16 = "foo\007bar"; + + + + +test17(foo) +# 186 "example.c" +charstar foo[bar]; +# 186 "example.c" +{/*1*/}/*1*/ + +int (*(*test18[bar])())()= (int (*(*[bar])())()) 0; + + + + +int (*(*test19())())(); + + + +void test20(test21) +# 197 "example.c" +int test21(); +# 197 "example.c" +{/*1*/}/*1*/ + +void test22(test23) +# 199 "example.c" +struct { int foo; } test23(); +# 199 "example.c" +{/*1*/}/*1*/ + + + +void test23() +{/*1*/ + int (*test24())(), + y = (*test24(2)) (3), + z = ((*test24(2))(3)); +}/*1*/ + + + +int (*(*test25(foo))())() +# 212 "example.c" +long foo; +# 212 "example.c" +{/*1*/ }/*1*/ + +int (*(*test26(foo))())() +long foo; +{/*1*/ }/*1*/ + + + +void functie(cmdlin,c1) +# 220 "example.c" +struct {int l; char c[1];} *cmdlin; +# 220 "example.c" +struct {int l; char c[1];} *c1; +{/*1*/ +}/*1*/ @@ -0,0 +1,54 @@ +/*++ +/* NAME +/* hash 3 +/* SUMMARY +/* compute hash value for string +/* SYNOPSIS +/* int hash(string, size) +/* char *string; +/* int size; +/* DESCRIPTION +/* This function computes for the given null-terminated string an +/* integer hash value in the range 0..size-1. +/* SEE ALSO +/* .fi +/* Alfred V. Aho, Ravi Sethi and Jeffrey D. Ullman: Compilers: +/* principles, techniques and tools; Addison-Wesley, Amsterdam, 1986. +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* +/* Originally written by: P. J. Weinberger at Bell Labs. +/* LAST MODIFICATION +/* 92/01/15 21:53:12 +/* VERSION/RELEASE +/* %I +/*--*/ + +static char hash_sccsid[] = "@(#) hash.c 1.1 92/01/15 21:53:12"; + +/* hash - hash a string; original author: P. J. Weinberger at Bell Labs. */ + +int hash(s, size) +register char *s; +unsigned size; +{ + register unsigned long h = 0; + register unsigned long g; + + /* + * For a performance comparison with the hash function presented in K&R, + * first edition, see the "Dragon" book by Aho, Sethi and Ullman. + */ + + while (*s) { + h = (h << 4) + *s++; + if (g = (h & 0xf0000000)) { + h ^= (g >> 24); + h ^= g; + } + } + return (h % size); +} diff --git a/stdarg.h b/stdarg.h new file mode 100644 index 0000000..1f8aae7 --- /dev/null +++ b/stdarg.h @@ -0,0 +1,85 @@ + /* + * @(#) stdarg.h 1.4 93/06/18 22:29:44 + * + * Sample stdarg.h file for use with the unproto filter. + * + * This file serves two purposes. + * + * 1 - On systems that do not have a /usr/include/stdarg.h file, it should be + * included by C source files that implement ANSI-style variadic functions. + * Ultrix 4.[0-2] comes with stdarg.h but still needs the one that is + * provided with the unproto filter. + * + * 2 - To configure the unprototyper itself. If the _VA_ALIST_ macro is + * defined, its value will appear in the place of the "..." at the end of + * argument lists of variadic function *definitions* (not declarations). + * Some compilers (such as Greenhills m88k) have a non-empty va_dcl + * definition in the system header file varargs.h. If that is the case, + * define "_VA_DCL_" with the same value as va_dcl. If _VA_DCL_ is defined, + * the unprototyper will emit its value just before the opening "{". + * + * Compilers that always pass arguments via the stack can use the default code + * at the end of this file (this usually applies for the vax, mc68k and + * 80*86 architectures). + * + * Special tricks are needed for compilers that pass some or all function + * arguments via registers. Examples of the latter are given for the mips + * and sparc architectures. Usually the compiler special-cases an argument + * declaration such as "va_alist" or "__builtin_va_alist". For inspiration, + * see the local /usr/include/varargs.h file. + * + * You can use the varargs.c program provided with the unproto package to + * verify that the stdarg.h file has been set up correctly. + */ + +#ifdef sparc /* tested with SunOS 4.1.1 */ + +#define _VA_ALIST_ "__builtin_va_alist" +typedef char *va_list; +#define va_start(ap, p) (ap = (char *) &__builtin_va_alist) +#define va_arg(ap, type) ((type *) __builtin_va_arg_incr((type *) ap))[0] +#define va_end(ap) + +#else +#ifdef mips /* tested with Ultrix 4.0 and 4.2 */ + +#define _VA_ALIST_ "va_alist" +#include "/usr/include/stdarg.h" + +#else +#ifdef m88k /* Motorola SYSTEM V/88 R32V3 */ + +#define _VA_ALIST_ "va_alist" +#define _VA_DCL_ "va_type va_alist;" +typedef struct _va_struct { + int va_narg; + int *va_stkaddr; + int *va_iregs; +} va_list; +#define va_start(ap, p) \ +((ap).va_narg=(int *)&va_alist-va_stkarg, \ + (ap).va_stkaddr=va_stkarg, \ + (ap).va_iregs=(int *)va_intreg) +#define va_end(p) +#if defined(LittleEndian) +#define va_arg(p,mode) \ + (*(mode *)_gh_va_arg(&p, va_align(mode), va_regtyp(mode), sizeof(mode))) +#else /* defined(LittleEndian) */ +#define va_arg(p,mode) ( \ + (p).va_narg += ((p).va_narg & (va_align(mode) == 8)) + \ + (sizeof(mode)+3)/4, \ + ((mode *)((va_regtyp(mode) && (p).va_narg <= 8 ? \ + (p).va_iregs: \ + (p).va_stkaddr) + (p).va_narg))[-1]) +#endif /* defined(LittleEndian) */ + +#else /* vax, mc68k, 80*86 */ + +typedef char *va_list; +#define va_start(ap, p) (ap = (char *) (&(p)+1)) +#define va_arg(ap, type) ((type *) (ap += sizeof(type)))[-1] +#define va_end(ap) + +#endif /* m88k */ +#endif /* mips */ +#endif /* sparc */ diff --git a/stddef.h b/stddef.h new file mode 100644 index 0000000..97dbc01 --- /dev/null +++ b/stddef.h @@ -0,0 +1,23 @@ +/* @(#) stddef.h 1.1 92/02/15 17:25:46 */ + +#ifndef _stddef_h_ +#define _stddef_h_ + +/* NULL is also defined in <stdio.h> */ + +#ifndef NULL +#define NULL 0 +#endif + +/* Structure member offset - some compilers barf on this. */ + +#define offsetof(type, member) ((size_t) &((type *)0)->member) + +/* Some of the following types may already be defined in <sys/types.h>. */ + +/* #include <sys/types.h> */ +/* typedef long ptrdiff_t; /* type of pointer difference */ +/* typedef unsigned short wchar_t; /* wide character type */ +/* typedef unsigned size_t; /* type of sizeof */ + +#endif /* _stddef_h_ */ diff --git a/stdlib.h b/stdlib.h new file mode 100644 index 0000000..78d99dd --- /dev/null +++ b/stdlib.h @@ -0,0 +1,53 @@ +/* @(#) stdlib.h 1.1 92/02/15 17:25:45 */ + +#ifndef _stdlib_h_ +#define _stdlib_h_ + +/* NULL is also defined in <stdio.h> */ + +#ifndef NULL +#define NULL 0 +#endif + +/* + * Some functions in this file will be missing from the typical pre-ANSI + * UNIX library. Some pre-ANSI UNIX library functions have return types + * that differ from what ANSI requires. + */ + +extern double atof(); +extern int atoi(); +extern long atol(); +extern double strtod(); +extern long strtol(); +extern unsigned long strtoul(); +extern int rand(); +extern void srand(); +extern char *calloc(); +extern char *malloc(); +extern char *realloc(); +extern void free(); +extern void abort(); +extern void exit(); +extern int atextit(); +extern int system(); +extern char *getenv(); +extern char *bsearch(); +extern void qsort(); +extern int abs(); +extern long labs(); + +typedef struct { + int quot; + int rem; +} div_t; + +typedef struct { + long quot; + long rem; +} ldiv_t; + +extern div_t div(); +extern ldiv_t ldiv(); + +#endif /* _stdlib_h_ */ diff --git a/strsave.c b/strsave.c new file mode 100644 index 0000000..c2a4b15 --- /dev/null +++ b/strsave.c @@ -0,0 +1,71 @@ +/*++ +/* NAME +/* strsave 3 +/* SUMMARY +/* maintain unique copy of a string +/* SYNOPSIS +/* char *strsave(string) +/* char *string; +/* DESCRIPTION +/* This function returns a pointer to an unique copy of its +/* argument. +/* DIAGNOSTISC +/* strsave() calls fatal() when it runs out of memory. +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 92/01/15 21:53:13 +/* VERSION/RELEASE +/* 1.1 +/*--*/ + +static char strsave_sccsid[] = "@(#) strsave.c 1.1 92/01/15 21:53:13"; + +/* C library */ + +extern char *strcpy(); +extern char *malloc(); + +/* Application-specific stuff */ + +#include "error.h" + +#define STR_TABSIZE 100 + +struct string { + char *strval; /* unique string copy */ + struct string *next; /* next one in hash chain */ +}; + +static struct string *str_tab[STR_TABSIZE] = {0,}; + +/* More string stuff. Maybe it should go to an #include file. */ + +#define STREQ(x,y) (*(x) == *(y) && strcmp((x),(y)) == 0) + +/* strsave - save unique copy of string */ + +char *strsave(str) +register char *str; +{ + register struct string *s; + register int where = hash(str, STR_TABSIZE); + + /* Look for existing entry. */ + + for (s = str_tab[where]; s; s = s->next) + if (STREQ(str, s->strval)) + return (s->strval); + + /* Add new entry. */ + + if ((s = (struct string *) malloc(sizeof(*s))) == 0 + || (s->strval = malloc(strlen(str) + 1)) == 0) + fatal("out of memory"); + s->next = str_tab[where]; + str_tab[where] = s; + return (strcpy(s->strval, str)); +} diff --git a/symbol.c b/symbol.c new file mode 100644 index 0000000..ce9f7d9 --- /dev/null +++ b/symbol.c @@ -0,0 +1,144 @@ +/*++ +/* NAME +/* symbol 3 +/* SUMMARY +/* rudimentary symbol table package +/* SYNOPSIS +/* #include "symbol.h" +/* +/* void sym_init() +/* +/* void sym_enter(name, type) +/* char *name; +/* int type; +/* +/* struct symbol *sym_find(name) +/* char *name; +/* DESCRIPTION +/* This is a rudimentary symbol-table package, just enough to +/* keep track of a couple of C keywords. +/* +/* sym_init() primes the table with C keywords. At present, most of +/* the keywords that have to do with types are left out. +/* We need a different strategy to detect type definitions because +/* we do not keep track of typedef names. +/* +/* sym_enter() adds an entry to the symbol table. +/* +/* sym_find() locates a symbol table entry (it returns 0 if +/* it is not found). +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 92/02/15 18:59:56 +/* VERSION/RELEASE +/* 1.4 +/*--*/ + +static char symbol_sccsid[] = "@(#) symbol.c 1.4 92/02/15 18:59:56"; + +/* C library */ + +extern char *strcpy(); +extern char *malloc(); + +/* Application-specific stuff */ + +#include "error.h" +#include "token.h" +#include "symbol.h" + +#define SYM_TABSIZE 20 + +static struct symbol *sym_tab[SYM_TABSIZE] = {0,}; + +/* More string stuff. Maybe it should go to an #include file. */ + +#define STREQ(x,y) (*(x) == *(y) && strcmp((x),(y)) == 0) + +/* sym_enter - enter symbol into table */ + +void sym_enter(name, type) +char *name; +int type; +{ + struct symbol *s; + int where; + + if ((s = (struct symbol *) malloc(sizeof(*s))) == 0 + || (s->name = malloc(strlen(name) + 1)) == 0) + fatal("out of memory"); + (void) strcpy(s->name, name); + s->type = type; + + where = hash(name, SYM_TABSIZE); + s->next = sym_tab[where]; + sym_tab[where] = s; +} + +/* sym_find - locate symbol definition */ + +struct symbol *sym_find(name) +register char *name; +{ + register struct symbol *s; + + /* + * This function is called for almost every "word" token, so it better be + * fast. + */ + + for (s = sym_tab[hash(name, SYM_TABSIZE)]; s; s = s->next) + if (STREQ(name, s->name)) + return (s); + return (0); +} + + /* + * Initialization data for symbol table. We do not enter keywords for types. + * We use a different strategy to detect type declarations because we do not + * keep track of typedef names. + */ + +struct sym { + char *name; + int tokno; +}; + +static struct sym syms[] = { + "if", TOK_CONTROL, + "else", TOK_CONTROL, + "for", TOK_CONTROL, + "while", TOK_CONTROL, + "do", TOK_CONTROL, + "switch", TOK_CONTROL, + "case", TOK_CONTROL, + "default", TOK_CONTROL, + "return", TOK_CONTROL, + "continue", TOK_CONTROL, + "break", TOK_CONTROL, + "goto", TOK_CONTROL, + "struct", TOK_COMPOSITE, + "union", TOK_COMPOSITE, + "__DATE__", TOK_DATE, + "__TIME__", TOK_TIME, +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) + "void", TOK_VOID, +#endif + "asm", TOK_OTHER, + 0, +}; + +/* sym_init - enter known keywords into symbol table */ + +void sym_init() +{ + register struct sym *p; + + for (p = syms; p->name; p++) + sym_enter(p->name, p->tokno); +} + diff --git a/symbol.h b/symbol.h new file mode 100644 index 0000000..0711c1f --- /dev/null +++ b/symbol.h @@ -0,0 +1,11 @@ +/* @(#) symbol.h 1.1 91/09/22 21:21:42 */ + +struct symbol { + char *name; /* symbol name */ + int type; /* symbol type */ + struct symbol *next; +}; + +extern void sym_enter(); /* add symbol to table */ +extern struct symbol *sym_find(); /* locate symbol */ +extern void sym_init(); /* prime the table */ diff --git a/tok_class.c b/tok_class.c new file mode 100644 index 0000000..38ccd0d --- /dev/null +++ b/tok_class.c @@ -0,0 +1,432 @@ +/*++ +/* NAME +/* tok_class 3 +/* SUMMARY +/* token classification +/* PACKAGE +/* unproto +/* SYNOPSIS +/* #include "token.h" +/* +/* void tok_unget(t) +/* struct token *t; +/* +/* struct token *tok_class() +/* DESCRIPTION +/* tok_class() collects single and composite tokens, and +/* recognizes keywords. +/* At present, the only composite tokens are ()-delimited, +/* comma-separated lists, and non-whitespace tokens with attached +/* whitespace or comment tokens. +/* +/* Source transformations are: __DATE__ and __TIME__ are rewritten +/* to string constants with the current date and time, respectively. +/* Multiple string constants are concatenated. Optionally, "void *" +/* is mapped to "char *", and plain "void" to "int". +/* +/* tok_unget() implements an arbitrary amount of token pushback. +/* Only tokens obtained through tok_class() should be given to +/* tok_unget(). This function accepts a list of tokens in +/* last-read-first order. +/* DIAGNOSTICS +/* The code complains if input terminates in the middle of a list. +/* BUGS +/* Does not preserve white space at the beginning of a list element +/* or after the end of a list. +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 92/01/15 21:53:02 +/* VERSION/RELEASE +/* 1.4 +/*--*/ + +static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02"; + +/* C library */ + +#include <stdio.h> + +extern char *strcpy(); +extern long time(); +extern char *ctime(); + +/* Application-specific stuff */ + +#include "error.h" +#include "vstring.h" +#include "token.h" +#include "symbol.h" + +static struct token *tok_list(); +static void tok_list_struct(); +static void tok_list_append(); +static void tok_strcat(); +static void tok_time(); +static void tok_date(); +static void tok_space_append(); + +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) +static void tok_void(); /* rewrite void keyword */ +#endif + +static struct token *tok_buf = 0; /* token push-back storage */ + +/* TOK_PREPEND - add token to LIFO queue, return head */ + +#define TOK_PREPEND(list,t) (t->next = list, list = t) + +/* tok_space_append - append trailing space except at start of or after list */ + +static void tok_space_append(list, t) +register struct token *list; +register struct token *t; +{ + + /* + * The head/tail fields of a token do triple duty. They are used to keep + * track of the members that make up a (list); to keep track of the + * non-blank tokens that make up one list member; and, finally, to tack + * whitespace and comment tokens onto the non-blank tokens that make up + * one list member. + * + * Within a (list), white space and comment tokens are always tacked onto + * the non-blank tokens to avoid parsing complications later on. For this + * reason, blanks and comments at the beginning of a list member are + * discarded because there is no token to tack them onto. (Well, we could + * start each list member with a dummy token, but that would mess up the + * whole unprototyper). + * + * Blanks or comments that follow a (list) are discarded, because the + * head/tail fields of a (list) are already being used for other + * purposes. + * + * Newlines within a (list) are discarded because they can mess up the + * output when we rewrite function headers. The output routines will + * regenerate discarded newlines, anyway. + */ + + if (list == 0 || list->tokno == TOK_LIST) { + tok_free(t); + } else { + tok_list_append(list, t); + } +} + +/* tok_class - discriminate single tokens, keywords, and composite tokens */ + +struct token *tok_class() +{ + register struct token *t; + register struct symbol *s; + + /* + * Use push-back token, if available. Push-back tokens are already + * canonical and can be passed on to the caller without further + * inspection. + */ + + if (t = tok_buf) { + tok_buf = t->next; + t->next = 0; + return (t); + } + /* Read a new token and canonicalize it. */ + + if (t = tok_get()) { + switch (t->tokno) { + case '(': /* beginning of list */ + t = tok_list(t); + break; + case TOK_WORD: /* look up keyword */ + if ((s = sym_find(t->vstr->str))) { + switch (s->type) { + case TOK_TIME: /* map __TIME__ to string */ + tok_time(t); + tok_strcat(t); /* look for more strings */ + break; + case TOK_DATE: /* map __DATE__ to string */ + tok_date(t); + tok_strcat(t); /* look for more strings */ + break; +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) + case TOK_VOID: /* optionally map void types */ + tok_void(t); + break; +#endif + default: /* other keyword */ + t->tokno = s->type; + break; + } + } + break; + case '"': /* string, look for more */ + tok_strcat(t); + break; + } + } + return (t); +} + +/* tok_list - collect ()-delimited, comma-separated list of tokens */ + +static struct token *tok_list(t) +struct token *t; +{ + register struct token *list = tok_alloc(); + char *filename; + int lineno; + + /* Save context of '(' for diagnostics. */ + + filename = t->path; + lineno = t->line; + + list->tokno = TOK_LIST; + list->head = list->tail = t; + list->path = t->path; + list->line = t->line; +#ifdef DEBUG + strcpy(list->vstr->str, "LIST"); +#endif + + /* + * Read until the matching ')' is found, accounting for structured stuff + * (enclosed by '{' and '}' tokens). Break the list up at each ',' token, + * and try to preserve as much whitespace as possible. Newlines are + * discarded so that they will not mess up the layout when we rewrite + * argument lists. The output routines will regenerate discarded + * newlines. + */ + + while (t = tok_class()) { /* skip blanks */ + switch (t->tokno) { + case ')': /* end of list */ + tok_list_append(list, t); + return (list); + case '{': /* struct/union type */ + tok_list_struct(list->tail, t); + break; + case TOK_WSPACE: /* preserve trailing blanks */ + tok_space_append(list->tail->tail, t); /* except after list */ + break; + case '\n': /* fix newlines later */ + tok_free(t); + break; + case ',': /* list separator */ + tok_list_append(list, t); + break; + default: /* other */ + tok_list_append(list->tail, t); + break; + } + } + error_where(filename, lineno, "unmatched '('"); + return (list); /* do not waste any data */ +} + +/* tok_list_struct - collect structured type info within list */ + +static void tok_list_struct(list, t) +register struct token *list; +register struct token *t; +{ + char *filename; + int lineno; + + /* + * Save context of '{' for diagnostics. This routine is called by the one + * that collects list members. If the '}' is not found, the list + * collector will not see the closing ')' either. + */ + + filename = t->path; + lineno = t->line; + + tok_list_append(list, t); + + /* + * Collect tokens until the matching '}' is found. Try to preserve as + * much whitespace as possible. Newlines are discarded so that they do + * not interfere when rewriting argument lists. The output routines will + * regenerate discarded newlines. + */ + + while (t = tok_class()) { + switch (t->tokno) { + case TOK_WSPACE: /* preserve trailing blanks */ + tok_space_append(list->tail, t); /* except after list */ + break; + case '\n': /* fix newlines later */ + tok_free(t); + break; + case '{': /* recurse */ + tok_list_struct(list, t); + break; + case '}': /* done */ + tok_list_append(list, t); + return; + default: /* other */ + tok_list_append(list, t); + break; + } + } + error_where(filename, lineno, "unmatched '{'"); +} + +/* tok_strcat - concatenate multiple string constants */ + +static void tok_strcat(t1) +register struct token *t1; +{ + register struct token *t2; + register struct token *lookahead = 0; + + /* + * Read ahead past whitespace, comments and newlines. If we find a string + * token, concatenate it with the previous one and push back the + * intervening tokens (thus preserving as much information as possible). + * If we find something else, push back all lookahead tokens. + */ + +#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; } + + while (t2 = tok_class()) { + switch (t2->tokno) { + case TOK_WSPACE: /* read past comments/blanks */ + case '\n': /* read past newlines */ + TOK_PREPEND(lookahead, t2); + break; + case '"': /* concatenate string tokens */ + if (vs_strcpy(t1->vstr, + t1->vstr->str + strlen(t1->vstr->str) - 1, + t2->vstr->str + 1) == 0) + fatal("out of memory"); + tok_free(t2); + PUSHBACK_AND_RETURN; + default: /* something else, push back */ + tok_unget(t2); + PUSHBACK_AND_RETURN; + } + } + PUSHBACK_AND_RETURN; /* hit EOF */ +} + +#if defined(MAP_VOID_STAR) || defined(MAP_VOID) + +/* tok_void - support for compilers that have problems with "void" */ + +static void tok_void(t) +register struct token *t; +{ + register struct token *t2; + register struct token *lookahead = 0; + + /* + * Look ahead beyond whitespace, comments and newlines until we see a '*' + * token. If one is found, replace "void" by "char". If we find something + * else, and if "void" should always be mapped, replace "void" by "int". + * Always push back the lookahead tokens. + * + * XXX The code also replaces the (void) argument list; this must be + * accounted for later on. The alternative would be to add (in unproto.c) + * TOK_VOID cases all over the place and that would be too error-prone. + */ + +#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; } + + while (t2 = tok_class()) { + switch (TOK_PREPEND(lookahead, t2)->tokno) { + case TOK_WSPACE: /* read past comments/blanks */ + case '\n': /* read past newline */ + break; + case '*': /* "void *" -> "char *" */ + if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0) + fatal("out of memory"); + PUSHBACK_AND_RETURN; + default: +#ifdef MAP_VOID /* plain "void" -> "int" */ + if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0) + fatal("out of memory"); +#endif + PUSHBACK_AND_RETURN; + } + } + PUSHBACK_AND_RETURN; /* hit EOF */ +} + +#endif + +/* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */ + +static void tok_time(t) +struct token *t; +{ + long now; + char *cp; + char buf[BUFSIZ]; + + /* + * Using sprintf() to select parts of a string is gross, but this should + * be fast enough. + */ + + (void) time(&now); + cp = ctime(&now); + sprintf(buf, "\"%.8s\"", cp + 11); + if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0) + fatal("out of memory"); + t->tokno = buf[0]; +} + +/* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */ + +static void tok_date(t) +struct token *t; +{ + long now; + char *cp; + char buf[BUFSIZ]; + + /* + * Using sprintf() to select parts of a string is gross, but this should + * be fast enough. + */ + + (void) time(&now); + cp = ctime(&now); + sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20); + if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0) + fatal("out of memory"); + t->tokno = buf[0]; +} + +/* tok_unget - push back one or more possibly composite tokens */ + +void tok_unget(t) +register struct token *t; +{ + register struct token *next; + + do { + next = t->next; + TOK_PREPEND(tok_buf, t); + } while (t = next); +} + +/* tok_list_append - append data to list */ + +static void tok_list_append(h, t) +struct token *h; +struct token *t; +{ + if (h->head == 0) { + h->head = h->tail = t; + } else { + h->tail->next = t; + h->tail = t; + } +} diff --git a/tok_io.c b/tok_io.c new file mode 100644 index 0000000..74ae6bc --- /dev/null +++ b/tok_io.c @@ -0,0 +1,612 @@ +/*++ +/* NAME +/* tok_io 3 +/* SUMMARY +/* token I/O +/* PACKAGE +/* unproto +/* SYNOPSIS +/* #include "token.h" +/* +/* struct token *tok_get() +/* +/* void tok_flush(t) +/* struct token *t; +/* +/* void tok_show(t) +/* struct token *t; +/* +/* void tok_show_ch(t) +/* struct token *t; +/* +/* void put_str(s) +/* char *s; +/* +/* void put_ch(c) +/* int c; +/* +/* void put_nl() +/* +/* char *in_path; +/* int in_line; +/* DESCRIPTION +/* These functions read from stdin and write to stdout. The +/* tokenizer keeps track of where the token appeared in the input +/* stream; on output, this information is used to preserve correct +/* line number information (even after lots of token lookahead or +/* after function-header rewriting) so that diagnostics from the +/* next compiler stage make sense. +/* +/* tok_get() reads the next token from standard input. It returns +/* a null pointer when the end of input is reached. +/* +/* tok_show() displays the contents of a (possibly composite) token +/* on the standard output. +/* +/* tok_show_ch() displays the contents of a single-character token +/* on the standard output. The character should not be a newline. +/* +/* tok_flush() displays the contents of a (possibly composite) token +/* on the standard output and makes it available for re-use. +/* +/* put_str() writes a null-terminated string to standard output. +/* There should be no newline characters in the string argument. +/* +/* put_ch() writes one character to standard output. The character +/* should not be a newline. +/* +/* put_nl() outputs a newline character and adjusts the program's idea of +/* the current output line. +/* +/* The in_path and in_line variables contain the file name and +/* line number of the most recently read token. +/* BUGS +/* The tokenizer is just good enough for the unproto filter. +/* As a benefit, it is quite fast. +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 92/01/15 21:52:59 +/* VERSION/RELEASE +/* 1.3 +/*--*/ + +static char io_sccsid[] = "@(#) tok_io.c 1.3 92/01/15 21:52:59"; + +/* C library */ + +#include <stdio.h> +#include <ctype.h> + +extern char *strchr(); +extern char *malloc(); +extern char *realloc(); +extern char *strcpy(); + +/* Application-specific stuff */ + +#include "token.h" +#include "vstring.h" +#include "error.h" + +extern char *strsave(); /* XXX need include file */ + +/* Stuff to keep track of original source file name and position */ + +static char def_path[] = ""; /* default path name */ + +char *in_path = def_path; /* current input file name */ +int in_line = 1; /* current input line number */ + +static char *out_path = def_path; /* last name in output line control */ +static int out_line = 1; /* current output line number */ +int last_ch; /* type of last output */ + +/* Forward declarations */ + +static int read_quoted(); +static void read_comment(); +static int backslash_newline(); +static char *read_hex(); +static char *read_octal(); +static void fix_line_control(); + + /* + * Character input with one level of pushback. The INPUT() macro recursively + * strips backslash-newline pairs from the input stream. The UNPUT() macro + * should be used only for characters obtained through the INPUT() macro. + * + * After skipping a backslash-newline pair, the input line counter is not + * updated, and we continue with the same logical source line. We just + * update a counter with the number of backslash-newline sequences that must + * be accounted for (backslash_newline() updates the counter). At the end of + * the logical source line, an appropriate number of newline characters is + * pushed back (in tok_get()). I do not know how GCC handles this, but it + * seems to produce te same output. + * + * Because backslash_newline() recursively calls itself (through the INPUT() + * macro), we will run out of stack space, given a sufficiently long + * sequence of backslash-newline pairs. + */ + +static char in_char = 0; /* push-back storage */ +static int in_flag = 0; /* pushback available */ +static int nl_compensate = 0; /* line continuation kluge */ + +#define INPUT(c) (in_flag ? (in_flag = 0, c = in_char) : \ + (c = getchar()) != '\\' ? c : \ + (c = getchar()) != '\n' ? (ungetc(c, stdin), c = '\\') : \ + (c = backslash_newline())) +#define UNPUT(c) (in_flag = 1, in_char = c) + +/* Directives that should be ignored. */ + +#ifdef IGNORE_DIRECTIVES + +static char *ignore_directives[] = { + IGNORE_DIRECTIVES, + 0, +}; + +#endif + +/* Modified string and ctype stuff. */ + +#define STREQUAL(x,y) (*(x) == *(y) && strcmp((x),(y)) == 0) + +#define ISALNUM(c) (isalnum(c) || (c) == '_') +#define ISALPHA(c) (isalpha(c) || (c) == '_') +#define ISSPACE(c) (isspace(c) && c != '\n') +#define ISDOT(c) (c == '.') +#define ISHEX(c) (isdigit(c) || strchr("abcdefABCDEF", c) != 0) +#define ISOCTAL(c) (isdigit(c) && (c) != '8' && (c) != '9') + +/* Collect all characters that satisfy one condition */ + +#define COLLECT(v,c,cond) { \ + register struct vstring *vs = v; \ + register char *cp = vs->str; \ + *cp++ = c; \ + while (INPUT(c) != EOF) { \ + if (cond) { \ + if (VS_ADDCH(vs, cp, c) == 0) \ + fatal("out of memory"); \ + } else { \ + UNPUT(c); \ + break; \ + } \ + } \ + *cp = 0; \ + } + +/* Ensure that output line information is correct */ + +#define CHECK_LINE_CONTROL(p,l) { if (out_path != (p) || out_line != (l)) \ + fix_line_control((p),(l)); } + +/* do_control - parse control line */ + +static int do_control() +{ + struct token *t; + int line; + char *path; + + /* Make sure that the directive shows up in the right place. */ + + CHECK_LINE_CONTROL(in_path, in_line); + + while (t = tok_get()) { + switch (t->tokno) { + + case TOK_WSPACE: + /* Ignore blanks after "#" token. */ + tok_free(t); + break; + + case TOK_NUMBER: + + /* + * Line control is of the form: number pathname junk. Since we + * have no idea what junk the preprocessor may generate, we copy + * all line control tokens to stdout. + */ + + put_str("# "); + line = atoi(t->vstr->str); /* extract line number */ + tok_flush(t); + while ((t = tok_get()) && t->tokno == TOK_WSPACE) + tok_flush(t); /* copy white space */ + if (t) { /* extract path name */ + path = (t->tokno == '"') ? strsave(t->vstr->str) : in_path; + do { + tok_flush(t); /* copy until newline */ + } while (t->tokno != '\n' && (t = tok_get())); + } + out_line = in_line = line; /* synchronize */ + out_path = in_path = path; /* synchronize */ + return; + +#ifdef IGNORE_DIRECTIVES + + case TOK_WORD: + + /* + * Optionally ignore other #directives. This is only a partial + * solution, because the preprocessor will still see them. + */ + { + char **cpp; + char *cp = t->vstr->str; + + for (cpp = ignore_directives; *cpp; cpp++) { + if (STREQUAL(cp, *cpp)) { + do { + tok_free(t); + } while (t->tokno != '\n' && (t = tok_get())); + return; + } + } + } + /* FALLTHROUGH */ +#endif + default: + /* Pass through. */ + put_ch('#'); + do { + tok_flush(t); + } while (t->tokno != '\n' && (t = tok_get())); + return; + + case 0: + /* Hit EOF, punt. */ + put_ch('#'); + return; + } + } +} + +/* backslash_newline - fix up things after reading a backslash-newline pair */ + +static int backslash_newline() +{ + register int c; + + nl_compensate++; + return (INPUT(c)); +} + +/* tok_get - get next token */ + +static int last_tokno = '\n'; + +struct token *tok_get() +{ + register struct token *t; + register int c; + int d; + + /* + * Get one from the pool and fill it in. The loop is here in case we hit + * a preprocessor control line, which happens in a minority of all cases. + * We update the token input path and line info *after* backslash-newline + * processing or the newline compensation would go wrong. + */ + + t = tok_alloc(); + + for (;;) { + if ((INPUT(c)) == EOF) { + tok_free(t); + return (0); + } else if ((t->line = in_line, t->path = in_path), !isascii(c)) { + t->vstr->str[0] = c; + t->vstr->str[1] = 0; + t->tokno = TOK_OTHER; + break; + } else if (ISSPACE(c)) { + COLLECT(t->vstr, c, ISSPACE(c)); + t->tokno = TOK_WSPACE; + break; + } else if (ISALPHA(c)) { + COLLECT(t->vstr, c, ISALNUM(c)); + t->tokno = TOK_WORD; + break; + } else if (isdigit(c)) { + COLLECT(t->vstr, c, isdigit(c)); + t->tokno = TOK_NUMBER; + break; + } else if (c == '"' || c == '\'') { + t->tokno = read_quoted(t->vstr, c); /* detect missing end quote */ + break; + } else if (ISDOT(c)) { + COLLECT(t->vstr, c, ISDOT(c)); + t->tokno = TOK_OTHER; + break; + } else if (c == '#' && last_tokno == '\n') { + do_control(); + continue; + } else { + t->vstr->str[0] = c; + if (c == '\n') { + in_line++; + if (nl_compensate > 0) { /* compensation for bs-nl */ + UNPUT('\n'); + nl_compensate--; + } + } else if (c == '/') { + if ((INPUT(d)) == '*') { + t->vstr->str[1] = d; /* comment */ + read_comment(t->vstr); + t->tokno = TOK_WSPACE; + break; + } else { + if (d != EOF) + UNPUT(d); + } + } else if (c == '\\') { + t->vstr->str[1] = (INPUT(c) == EOF ? 0 : c); + t->vstr->str[2] = 0; + t->tokno = TOK_OTHER; + break; + } + t->vstr->str[1] = 0; + t->tokno = c; + break; + } + } + last_tokno = t->tokno; + t->end_line = in_line; + return (t); +} + +/* read_quoted - read string or character literal, canonicalize escapes */ + +static int read_quoted(vs, ch) +register struct vstring *vs; +int ch; +{ + register char *cp = vs->str; + register int c; + int ret = TOK_OTHER; + + *cp++ = ch; + + /* + * Clobber the token type in case of a premature newline or EOF. This + * prevents us from attempting to concatenate string constants with + * broken ones that have no closing quote. + */ + + while (INPUT(c) != EOF) { + if (c == '\n') { /* newline in string */ + UNPUT(c); + break; + } + if (VS_ADDCH(vs, cp, c) == 0) /* store character */ + fatal("out of memory"); + if (c == ch) { /* closing quote */ + ret = c; + break; + } + if (c == '\\') { /* parse escape sequence */ + if ((INPUT(c)) == EOF) { /* EOF, punt */ + break; + } else if (c == 'a') { /* \a -> audible bell */ + if ((cp = vs_strcpy(vs, cp, BELL)) == 0) + fatal("out of memory"); + } else if (c == 'x') { /* \xhh -> \nnn */ + cp = read_hex(vs, cp); + } else if (ISOCTAL(c) && ch != '\'') { + cp = read_octal(vs, cp, c); /* canonicalize \octal */ + } else { + if (VS_ADDCH(vs, cp, c) == 0) /* \other: leave alone */ + fatal("out of memory"); + } + } + } + *cp = 0; + return (ret); +} + +/* read_comment - stuff a whole comment into one huge token */ + +static void read_comment(vs) +register struct vstring *vs; +{ + register char *cp = vs->str + 2; /* skip slash star */ + register int c; + register int d; + + while (INPUT(c) != EOF) { + if (VS_ADDCH(vs, cp, c) == 0) + fatal("out of memory"); + if (c == '*') { + if ((INPUT(d)) == '/') { + if (VS_ADDCH(vs, cp, d) == 0) + fatal("out of memory"); + break; + } else { + if (d != EOF) + UNPUT(d); + } + } else if (c == '\n') { + in_line++; + } else if (c == '\\') { + if ((INPUT(d)) != EOF && VS_ADDCH(vs, cp, d) == 0) + fatal("out of memory"); + } + } + *cp = 0; +} + +/* read_hex - rewrite hex escape to three-digit octal escape */ + +static char *read_hex(vs, cp) +struct vstring *vs; +register char *cp; +{ + register int c; + register int i; + char buf[BUFSIZ]; + int len; + unsigned val; + + /* + * Eat up all subsequent hex digits. Complain later when there are too + * many. + */ + + for (i = 0; i < sizeof(buf) && (INPUT(c) != EOF) && ISHEX(c); i++) + buf[i] = c; + buf[i] = 0; + + if (i < sizeof(buf) && c) + UNPUT(c); + + /* + * Convert hex form to three-digit octal form. The three-digit form is + * used so that strings can be concatenated without problems. Complain + * about malformed input; truncate the result to at most three octal + * digits. + */ + + if (i == 0) { + error("\\x escape sequence without hexadecimal digits"); + if (VS_ADDCH(vs, cp, 'x') == 0) + fatal("out of memory"); + } else { + (void) sscanf(buf, "%x", &val); + sprintf(buf, "%03o", val); + if ((len = strlen(buf)) > 3) + error("\\x escape sequence yields non-character value"); + if ((cp = vs_strcpy(vs, cp, buf + len - 3)) == 0) + fatal("out of memory"); + } + return (cp); +} + +/* read_octal - convert octal escape to three-digit format */ + +static char obuf[] = "00123"; + +static char *read_octal(vs, cp, c) +register struct vstring *vs; +register char *cp; +register int c; +{ + register int i; + +#define buf_input (obuf + 2) + + /* Eat up at most three octal digits. */ + + buf_input[0] = c; + for (i = 1; i < 3 && (INPUT(c) != EOF) && ISOCTAL(c); i++) + buf_input[i] = c; + buf_input[i] = 0; + + if (i < 3 && c) + UNPUT(c); + + /* + * Leave three-digit octal escapes alone. Convert one-digit and two-digit + * octal escapes to three-digit form by prefixing them with a suitable + * number of '0' characters. This is done so that strings can be + * concatenated without problems. + */ + + if ((cp = vs_strcpy(vs, cp, buf_input + i - 3)) == 0) + fatal("out of memory"); + return (cp); +} + +/* put_nl - emit newline and adjust output line count */ + +void put_nl() +{ + put_ch('\n'); + out_line++; +} + +/* fix_line_control - to adjust path and/or line count info in output */ + +static void fix_line_control(path, line) +register char *path; +register int line; +{ + + /* + * This function is called sporadically, so it should not be a problem + * that we repeat some of the tests that preceded this function call. + * + * Emit a newline if we are not at the start of a line. + * + * If we switch files, or if we jump backwards, emit line control. If we + * jump forward, emit the proper number of newlines to compensate. + */ + + if (last_ch != '\n') /* terminate open line */ + put_nl(); + if (path != out_path || line < out_line) { /* file switch or back jump */ + printf("# %d %s\n", out_line = line, out_path = path); + last_ch = '\n'; + } else { /* forward jump */ + while (line > out_line) + put_nl(); + } +} + +/* tok_show_ch - output single-character token (not newline) */ + +void tok_show_ch(t) +register struct token *t; +{ + CHECK_LINE_CONTROL(t->path, t->line); + + put_ch(t->tokno); /* show token contents */ +} + +/* tok_show - output (possibly composite) token */ + +void tok_show(t) +register struct token *t; +{ + register struct token *p; + + if (t->tokno == TOK_LIST) { + register struct token *s; + + /* + * This branch is completely in terms of tok_xxx() primitives, so + * there is no need to check the line control information. + */ + + for (s = t->head; s; s = s->next) { + tok_show_ch(s); /* '(' or ',' or ')' */ + for (p = s->head; p; p = p->next) + tok_show(p); /* show list element */ + } + } else { + register char *cp = t->vstr->str; + + /* + * Measurements show that it pays off to give special treatment to + * single-character tokens. Note that both types of token may cause a + * change of output line number. + */ + + CHECK_LINE_CONTROL(t->path, t->line); + if (cp[1] == 0) { + put_ch(*cp); /* single-character token */ + } else { + put_str(cp); /* multi_character token */ + } + out_line = t->end_line; /* may span multiple lines */ + for (p = t->head; p; p = p->next) + tok_show(p); /* trailing blanks */ + } +} diff --git a/tok_pool.c b/tok_pool.c new file mode 100644 index 0000000..e2ed107 --- /dev/null +++ b/tok_pool.c @@ -0,0 +1,103 @@ +/*++ +/* NAME +/* tok_pool 3 +/* SUMMARY +/* maintain pool of unused token structures +/* PACKAGE +/* unproto +/* SYNOPSIS +/* #include "token.h" +/* +/* struct token *tok_alloc() +/* +/* void tok_free(t) +/* struct token *t; +/* DESCRIPTION +/* tok_alloc() and tok_free() maintain a pool of unused token +/* structures. +/* +/* tok_alloc() takes the first free token structure from the pool +/* or allocates a new one if the pool is empty. +/* +/* tok_free() adds a (possibly composite) token structure to the pool. +/* BUGS +/* The pool never shrinks. +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 92/01/15 21:53:04 +/* VERSION/RELEASE +/* 1.2 +/*--*/ + +static char pool_sccsid[] = "@(#) tok_pool.c 1.2 92/01/15 21:53:04"; + +/* C library */ + +extern char *malloc(); + +/* Application-specific stuff */ + +#include "token.h" +#include "vstring.h" +#include "error.h" + +#define TOKLEN 5 /* initial string buffer length */ + +struct token *tok_pool = 0; /* free token pool */ + +/* tok_alloc - allocate token structure from pool or heap */ + +struct token *tok_alloc() +{ + register struct token *t; + + if (tok_pool) { /* re-use an old one */ + t = tok_pool; + tok_pool = t->next; + } else { /* create a new one */ + if ((t = (struct token *) malloc(sizeof(struct token))) == 0 + || (t->vstr = vs_alloc(TOKLEN)) == 0) + fatal("out of memory"); + } + t->next = t->head = t->tail = 0; +#ifdef DEBUG + strcpy(t->vstr->str, "BUSY"); +#endif + return (t); +} + +/* tok_free - return (possibly composite) token to pool of free tokens */ + +void tok_free(t) +register struct token *t; +{ +#ifdef DEBUG + /* Check if we are freeing free token */ + + register struct token *p; + + for (p = tok_pool; p; p = p->next) + if (p == t) + fatal("freeing free token"); +#endif + + /* Free neighbours and subordinates first */ + + if (t->next) + tok_free(t->next); + if (t->head) + tok_free(t->head); + + /* Free self */ + + t->next = tok_pool; + t->head = t->tail = 0; + tok_pool = t; +#ifdef DEBUG + strcpy(t->vstr->str, "FREE"); +#endif +} @@ -0,0 +1,55 @@ +/* @(#) token.h 1.4 92/01/15 21:53:17 */ + +struct token { + int tokno; /* token value, see below */ + char *path; /* file name */ + int line; /* line number at token start */ + int end_line; /* line number at token end */ + struct vstring *vstr; /* token contents */ + struct token *next; + struct token *head; + struct token *tail; +}; + +/* Special token values */ + +#define TOK_LIST 256 /* () delimited list */ +#define TOK_WORD 257 /* keyword or identifier */ +#define TOK_NUMBER 258 /* one or more digits */ +#define TOK_WSPACE 259 /* comment, white space, not newline */ +#define TOK_OTHER 260 /* other token */ +#define TOK_CONTROL 261 /* flow control keyword */ +#define TOK_COMPOSITE 262 /* struct or union keyword */ +#define TOK_DATE 263 /* date: Mmm dd yyyy */ +#define TOK_TIME 264 /* time: hh:mm:ss */ +#define TOK_VOID 265 /* void keyword */ + +/* Input/output functions and macros */ + +extern struct token *tok_get(); /* read next single token */ +extern void tok_show(); /* display (composite) token */ +extern struct token *tok_class(); /* classify tokens */ +extern void tok_unget(); /* stuff token back into input */ +extern void put_nl(); /* print newline character */ +extern void tok_show_ch(); /* emit single-character token */ + +#define tok_flush(t) (tok_show(t), tok_free(t)) + +#ifdef DEBUG +#define put_ch(c) (putchar(last_ch = c),fflush(stdout)) +#define put_str(s) (fputs(s,stdout),last_ch = 0,fflush(stdout)) +#else +#define put_ch(c) putchar(last_ch = c) +#define put_str(s) (fputs(s,stdout),last_ch = 0) +#endif + +/* Memory management */ + +struct token *tok_alloc(); /* allocate token storage */ +extern void tok_free(); /* re-cycle storage */ + +/* Context */ + +extern char *in_path; /* current input path name */ +extern int in_line; /* current input line number */ +extern int last_ch; /* type of last output */ diff --git a/unproto.1 b/unproto.1 new file mode 100644 index 0000000..31490c3 --- /dev/null +++ b/unproto.1 @@ -0,0 +1,152 @@ +.TH UNPROTO 1 +.ad +.fi +.SH NAME +unproto +\- +compile ANSI C with traditional UNIX C compiler +.SH PACKAGE +.na +.nf +unproto +.SH SYNOPSIS +.na +.nf +/somewhere/cpp ... + +cc cflags -E file.c | unproto >file.i; cc cflags -c file.i +.SH DESCRIPTION +.ad +.fi +This document describes a filter that sits in between the UNIX +C preprocessor and the next UNIX C compiler stage, on the fly rewriting +ANSI-style syntax to old-style syntax. Typically, the program is +invoked by the native UNIX C compiler as an alternate preprocessor. +The unprototyper in turn invokes the native C preprocessor and +massages its output. Similar tricks can be used with the lint(1) +command. + +Language constructs that are always rewritten: +.TP +function headings, prototypes, pointer types +ANSI-C style function headings, function prototypes, function +pointer types and type casts are rewritten to old style. +<stdarg.h> support is provided for functions with variable-length +argument lists. +.TP +character and string constants +The \\a and \\x escape sequences are rewritten to their (three-digit) +octal equivalents. + +Multiple string tokens are concatenated; an arbitrary number of +whitespace or comment tokens may appear between successive +string tokens. + +Within string constants, octal escape sequences are rewritten to the +three-digit \\ddd form, so that string concatenation produces correct +results. +.TP +date and time +The __DATE__ and __TIME__ tokens are replaced by string constants +of the form "Mmm dd yyyy" and "hh:mm:ss", respectively. The result +is subjected to string concatenation, just like any other string +constant. +.PP +Language constructs that are rewritten only if the program has been +configured to do so: +.TP +void types +The unprototyper can be configured to rewrite "void *" to "char *", +and even to rewrite plain "void" to "int". +These features are configurable because many traditional UNIX C +compilers do not need them. + +Note: (void) argument lists are always replaced by empty ones. +.PP +ANSI C constructs that are not rewritten because the traditional +UNIX C preprocessor provides suitable workarounds: +.TP +const and volatile +Use the "-Dconst=" and/or "-Dvolatile=" preprocessor directives to +get rid of unimplemented keywords. +.TP +token pasting and stringizing +The traditional UNIX C preprocessor provides excellent alternatives. +For example: + +.nf +.ne 2 +#define string(bar) "bar" /* instead of: # x */ +#define paste(x,y) x/**\/y /* instead of: x##y */ +.fi + +There is a good reason why the # and ## operators are not implemented +in the unprototyper. +After program text has gone through a non-ANSI C preprocessor, all +information about the grouping of the operands of # and ## is lost. +Thus, if the unprototyper were to perform these operations, it would +produce correct results only in the most trivial cases. Operands +with embedded blanks, operands that expand to null tokens, and nested +use of # and/or ## would cause all kinds of obscure problems. +.PP +Unsupported ANSI features: +.TP +trigraphs and #pragmas +Trigraphs are useful only for systems with broken character sets. +If the local compiler chokes on #pragma, insert a blank before the +"#" character, and enclose the offending directive between #ifdef +and #endif. +.SH SEE ALSO +.na +.nf +.ad +.fi +cc(1), how to specify a non-default C preprocessor. +Some versions of the lint(1) command are implemented as a shell +script. It should require only minor modification for integration +with the unprototyper. Other versions of the lint(1) command accept +the same command syntax as the C compiler for the specification of a +non-default preprocessor. Some research may be needed. +.SH FILES +.na +.nf +/wherever/stdarg.h, provided with the unproto filter. +.SH DIAGNOSTICS +.ad +.fi +Problems are reported on the standard error stream. +A non-zero exit status means that there was a problem. +.SH BUGS +.ad +.fi +The unprototyper should be run on preprocessed source only: +unexpanded macros may confuse the program. + +Declarations of (object) are misunderstood and will result in +syntax errors: the objects between parentheses disappear. + +Sometimes does not preserve whitespace after parentheses and commas. +This is a purely aesthetical matter, and the compiler should not care. +Whitespace within string constants is, of course, left intact. + +Does not generate explicit type casts for function-argument +expressions. The lack of explicit conversions between integral +and/or pointer argument types should not be a problem in environments +where sizeof(int) == sizeof(long) == sizeof(pointer). A more serious +problem is the lack of automatic type conversions between integral and +floating-point argument types. Let lint(1) be your friend. +.SH AUTHOR(S) +.na +.nf +Wietse Venema (wietse@wzv.win.tue.nl) +Eindhoven University of Technology +Department of Mathematics and Computer Science +Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +.SH LAST MODIFICATION +.na +.nf +93/06/18 22:29:37 +.SH VERSION/RELEASE +.na +.nf +1.6 diff --git a/unproto.c b/unproto.c new file mode 100644 index 0000000..2b2e764 --- /dev/null +++ b/unproto.c @@ -0,0 +1,999 @@ +/*++ +/* NAME +/* unproto 1 +/* SUMMARY +/* compile ANSI C with traditional UNIX C compiler +/* PACKAGE +/* unproto +/* SYNOPSIS +/* /somewhere/cpp ... +/* +/* cc cflags -E file.c | unproto >file.i; cc cflags -c file.i +/* DESCRIPTION +/* This document describes a filter that sits in between the UNIX +/* C preprocessor and the next UNIX C compiler stage, on the fly rewriting +/* ANSI-style syntax to old-style syntax. Typically, the program is +/* invoked by the native UNIX C compiler as an alternate preprocessor. +/* The unprototyper in turn invokes the native C preprocessor and +/* massages its output. Similar tricks can be used with the lint(1) +/* command. +/* +/* Language constructs that are always rewritten: +/* .TP +/* function headings, prototypes, pointer types +/* ANSI-C style function headings, function prototypes, function +/* pointer types and type casts are rewritten to old style. +/* <stdarg.h> support is provided for functions with variable-length +/* argument lists. +/* .TP +/* character and string constants +/* The \\a and \\x escape sequences are rewritten to their (three-digit) +/* octal equivalents. +/* +/* Multiple string tokens are concatenated; an arbitrary number of +/* whitespace or comment tokens may appear between successive +/* string tokens. +/* +/* Within string constants, octal escape sequences are rewritten to the +/* three-digit \\ddd form, so that string concatenation produces correct +/* results. +/* .TP +/* date and time +/* The __DATE__ and __TIME__ tokens are replaced by string constants +/* of the form "Mmm dd yyyy" and "hh:mm:ss", respectively. The result +/* is subjected to string concatenation, just like any other string +/* constant. +/* .PP +/* Language constructs that are rewritten only if the program has been +/* configured to do so: +/* .TP +/* void types +/* The unprototyper can be configured to rewrite "void *" to "char *", +/* and even to rewrite plain "void" to "int". +/* These features are configurable because many traditional UNIX C +/* compilers do not need them. +/* +/* Note: (void) argument lists are always replaced by empty ones. +/* .PP +/* ANSI C constructs that are not rewritten because the traditional +/* UNIX C preprocessor provides suitable workarounds: +/* .TP +/* const and volatile +/* Use the "-Dconst=" and/or "-Dvolatile=" preprocessor directives to +/* get rid of unimplemented keywords. +/* .TP +/* token pasting and stringizing +/* The traditional UNIX C preprocessor provides excellent alternatives. +/* For example: +/* +/* .nf +/* .ne 2 +/* #define string(bar) "bar" /* instead of: # x */ +/* #define paste(x,y) x/**\/y /* instead of: x##y */ +/* .fi +/* +/* There is a good reason why the # and ## operators are not implemented +/* in the unprototyper. +/* After program text has gone through a non-ANSI C preprocessor, all +/* information about the grouping of the operands of # and ## is lost. +/* Thus, if the unprototyper were to perform these operations, it would +/* produce correct results only in the most trivial cases. Operands +/* with embedded blanks, operands that expand to null tokens, and nested +/* use of # and/or ## would cause all kinds of obscure problems. +/* .PP +/* Unsupported ANSI features: +/* .TP +/* trigraphs and #pragmas +/* Trigraphs are useful only for systems with broken character sets. +/* If the local compiler chokes on #pragma, insert a blank before the +/* "#" character, and enclose the offending directive between #ifdef +/* and #endif. +/* SEE ALSO +/* .ad +/* .fi +/* cc(1), how to specify a non-default C preprocessor. +/* Some versions of the lint(1) command are implemented as a shell +/* script. It should require only minor modification for integration +/* with the unprototyper. Other versions of the lint(1) command accept +/* the same command syntax as the C compiler for the specification of a +/* non-default preprocessor. Some research may be needed. +/* FILES +/* /wherever/stdarg.h, provided with the unproto filter. +/* DIAGNOSTICS +/* Problems are reported on the standard error stream. +/* A non-zero exit status means that there was a problem. +/* BUGS +/* The unprototyper should be run on preprocessed source only: +/* unexpanded macros may confuse the program. +/* +/* Declarations of (object) are misunderstood and will result in +/* syntax errors: the objects between parentheses disappear. +/* +/* Sometimes does not preserve whitespace after parentheses and commas. +/* This is a purely aesthetical matter, and the compiler should not care. +/* Whitespace within string constants is, of course, left intact. +/* +/* Does not generate explicit type casts for function-argument +/* expressions. The lack of explicit conversions between integral +/* and/or pointer argument types should not be a problem in environments +/* where sizeof(int) == sizeof(long) == sizeof(pointer). A more serious +/* problem is the lack of automatic type conversions between integral and +/* floating-point argument types. Let lint(1) be your friend. +/* AUTHOR(S) +/* Wietse Venema (wietse@wzv.win.tue.nl) +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 93/06/18 22:29:37 +/* VERSION/RELEASE +/* 1.6 +/*--*/ + +static char unproto_sccsid[] = "@(#) unproto.c 1.6 93/06/18 22:29:37"; + +/* C library */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <stdio.h> +#include <errno.h> + +extern void exit(); +extern int optind; +extern char *optarg; +extern int getopt(); + +/* Application-specific stuff */ + +#include "vstring.h" +#include "stdarg.h" +#include "token.h" +#include "error.h" +#include "symbol.h" + +/* Forward declarations. */ + +static struct token *dcl_flush(); +static void block_flush(); +static void block_dcls(); +static struct token *show_func_ptr_type(); +static struct token *show_struct_type(); +static void show_arg_name(); +static void show_type(); +static void pair_flush(); +static void check_cast(); +static void show_empty_list(); + +#define check_cast_flush(t) (check_cast(t), tok_free(t)) + +#ifdef PIPE_THROUGH_CPP +static int pipe_stdin_through_cpp(); +#endif + +/* Disable debugging printfs while preserving side effects. */ + +#ifdef DEBUG +#define DPRINTF printf +#else +#define DPRINTF (void) +#endif + +/* An attempt to make some complicated expressions a bit more readable. */ + +#define STREQ(x,y) (*(x) == *(y) && !strcmp((x),(y))) + +#define LAST_ARG_AND_EQUAL(s,c) ((s)->next && (s)->next->next == 0 \ + && (s)->head && ((s)->head == (s)->tail) \ + && (STREQ((s)->head->vstr->str, (c)))) + +#define LIST_BEGINS_WITH_STAR(s) (s->head->head && s->head->head->tokno == '*') + +#define IS_FUNC_PTR_TYPE(s) (s->tokno == TOK_LIST && s->next \ + && s->next->tokno == TOK_LIST \ + && LIST_BEGINS_WITH_STAR(s)) + +/* What to look for to detect a (void) argument list. */ + +#ifdef MAP_VOID +#define VOID_ARG "int" /* bare "void" is mapped to "int" */ +#else +#define VOID_ARG "void" /* bare "void" is left alone */ +#endif + +/* main - driver */ + +int main(argc, argv) +int argc; +char **argv; +{ + register struct token *t; +#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ + int cpp_status; + int wait_pid; + int cpp_pid; + + cpp_pid = pipe_stdin_through_cpp(argv); +#endif + + sym_init(); /* prime the symbol table */ + + while (t = tok_class()) { + if (t = dcl_flush(t)) { /* try declaration */ + if (t->tokno == '{') { /* examine rejected token */ + block_flush(t); /* body */ + } else { + tok_flush(t); /* other, recover */ + } + } + } + +#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ + while ((wait_pid = wait(&cpp_status)) != -1 && wait_pid != cpp_pid) + /* void */ ; + return (errcount != 0 || wait_pid != cpp_pid || cpp_status != 0); +#else + return (errcount != 0); +#endif +} + +#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ + +/* pipe_stdin_through_cpp - avoid shell script overhead */ + +static int pipe_stdin_through_cpp(argv) +char **argv; +{ + int pipefds[2]; + int pid; + char **cpptr = argv; + int i; + struct stat st; + + /* + * The code that sets up the pipe requires that file descriptors 0,1,2 + * are already open. All kinds of mysterious things will happen if that + * is not the case. The following loops makes sure that descriptors 0,1,2 + * are set up properly. + */ + + for (i = 0; i < 3; i++) { + if (fstat(i, &st) == -1 && open("/dev/null", 2) != i) { + perror("open /dev/null"); + exit(1); + } + } + + /* + * With most UNIX implementations, the second non-option argument to + * /lib/cpp specifies the output file. If an output file other than + * stdout is specified, we must force /lib/cpp to write to stdout, and we + * must redirect our own standard output to the specified output file. + */ + +#define IS_OPTION(cp) ((cp)[0] == '-' && (cp)[1] != 0) + + /* Skip to first non-option argument, if any. */ + + while (*++cpptr && IS_OPTION(*cpptr)) + /* void */ ; + + /* + * Assume that the first non-option argument is the input file name. The + * next argument could be the output destination or an option (System V + * Release 2 /lib/cpp gets the options *after* the file arguments). + */ + + if (*cpptr && *++cpptr && **cpptr != '-') { + + /* + * The first non-option argument is followed by another argument that + * is not an option ("-stuff") or a hyphen ("-"). Redirect our own + * standard output before we clobber the file name. + */ + + if (freopen(*cpptr, "w", stdout) == 0) { + perror(*cpptr); + exit(1); + } + /* Clobber the file name argument so that /lib/cpp writes to stdout */ + + *cpptr = "-"; + } + /* Set up the pipe that connects /lib/cpp to our standard input. */ + + if (pipe(pipefds)) { + perror("pipe"); + exit(1); + } + switch (pid = fork()) { + case -1: /* error */ + perror("fork"); + exit(1); + /* NOTREACHED */ + case 0: /* child */ + (void) close(pipefds[0]); /* close reading end */ + (void) close(1); /* connect stdout to pipe */ + if (dup(pipefds[1]) != 1) + fatal("dup() problem"); + (void) close(pipefds[1]); /* close redundant fd */ + (void) execv(PIPE_THROUGH_CPP, argv); + perror(PIPE_THROUGH_CPP); + exit(1); + /* NOTREACHED */ + default: /* parent */ + (void) close(pipefds[1]); /* close writing end */ + (void) close(0); /* connect stdin to pipe */ + if (dup(pipefds[0]) != 0) + fatal("dup() problem"); + close(pipefds[0]); /* close redundant fd */ + return (pid); + } +} + +#endif + +/* show_arg_names - display function argument names */ + +static void show_arg_names(t) +register struct token *t; +{ + register struct token *s; + + /* Do argument names, but suppress void and rewrite trailing ... */ + + if (LAST_ARG_AND_EQUAL(t->head, VOID_ARG)) { + show_empty_list(t); /* no arguments */ + } else { + for (s = t->head; s; s = s->next) { /* foreach argument... */ + if (LAST_ARG_AND_EQUAL(s, "...")) { +#ifdef _VA_ALIST_ /* see ./stdarg.h */ + tok_show_ch(s); /* ',' */ + put_str(_VA_ALIST_); /* varargs magic */ +#endif + } else { + tok_show_ch(s); /* '(' or ',' or ')' */ + show_arg_name(s); /* extract argument name */ + } + } + } +} + +/* show_arg_types - display function argument types */ + +static void show_arg_types(t) +register struct token *t; +{ + register struct token *s; + + /* Do argument types, but suppress void and trailing ... */ + + if (!LAST_ARG_AND_EQUAL(t->head, VOID_ARG)) { + for (s = t->head; s; s = s->next) { /* foreach argument... */ + if (LAST_ARG_AND_EQUAL(s, "...")) { +#ifdef _VA_DCL_ /* see ./stdarg.h */ + put_str(_VA_DCL_); /* varargs magic */ + put_nl(); /* make output look nicer */ +#endif + } else { + if (s->head != s->tail) { /* really new-style argument? */ + show_type(s); /* rewrite type info */ + put_ch(';'); + put_nl(); /* make output look nicer */ + } + } + } + } +} + +/* header_flush - rewrite new-style function heading to old style */ + +static void header_flush(t) +register struct token *t; +{ + show_arg_names(t); /* show argument names */ + put_nl(); /* make output look nicer */ + show_arg_types(t); /* show argument types */ + tok_free(t); /* discard token */ +} + +/* fpf_header_names - define func returning ptr to func, no argument types */ + +static void fpf_header_names(list) +struct token *list; +{ + register struct token *s; + register struct token *p; + + /* + * Recurse until we find the argument list. Account for the rare case + * that list is a comma-separated list (which should be a syntax error). + * Display old-style fuction argument names. + */ + + for (s = list->head; s; s = s->next) { + tok_show_ch(s); /* '(' or ',' or ')' */ + for (p = s->head; p; p = p->next) { + if (p->tokno == TOK_LIST) { + if (IS_FUNC_PTR_TYPE(p)) { /* recurse */ + fpf_header_names(p); + show_empty_list(p = p->next); + } else { /* display argument names */ + show_arg_names(p); + } + } else { /* pass through other stuff */ + tok_show(p); + } + } + } +} + +/* fpf_header_types - define func returning ptr to func, argument types only */ + +static void fpf_header_types(list) +struct token *list; +{ + register struct token *s; + register struct token *p; + + /* + * Recurse until we find the argument list. Account for the rare case + * that list is a comma-separated list (which should be a syntax error). + * Display old-style function argument types. + */ + + for (s = list->head; s; s = s->next) { + for (p = s->head; p; p = p->next) { + if (p->tokno == TOK_LIST) { + if (IS_FUNC_PTR_TYPE(p)) { /* recurse */ + fpf_header_types(p); + p = p->next; + } else { /* display argument types */ + show_arg_types(p); + } + } + } + } +} + +/* fpf_header - define function returning pointer to function */ + +static void fpf_header(l1, l2) +struct token *l1; +struct token *l2; +{ + fpf_header_names(l1); /* strip argument types */ + show_empty_list(l2); /* strip prototype */ + put_nl(); /* nicer output */ + fpf_header_types(l1); /* show argument types */ +} + +/* skip_enclosed - skip over enclosed tokens */ + +static struct token *skip_enclosed(p, stop) +register struct token *p; +register int stop; +{ + register int start = p->tokno; + + /* Always return a pointer to the last processed token, never NULL. */ + + while (p->next) { + p = p->next; + if (p->tokno == start) { + p = skip_enclosed(p, stop); /* recurse */ + } else if (p->tokno == stop) { + break; /* done */ + } + } + return (p); +} + +/* show_arg_name - extract argument name from argument type info */ + +static void show_arg_name(s) +register struct token *s; +{ + if (s->head) { + register struct token *p; + register struct token *t = 0; + + /* Find the last interesting item. */ + + for (p = s->head; p; p = p->next) { + if (p->tokno == TOK_WORD) { + t = p; /* remember last word */ + } else if (p->tokno == '{') { + p = skip_enclosed(p, '}'); /* skip structured stuff */ + } else if (p->tokno == '[') { + break; /* dimension may be a macro */ + } else if (IS_FUNC_PTR_TYPE(p)) { + t = p; /* or function pointer */ + p = p->next; + } + } + + /* Extract argument name from last interesting item. */ + + if (t) { + if (t->tokno == TOK_LIST) + show_arg_name(t->head); /* function pointer, recurse */ + else + tok_show(t); /* print last word */ + } + } +} + +/* show_type - rewrite type to old-style syntax */ + +static void show_type(s) +register struct token *s; +{ + register struct token *p; + + /* + * Rewrite (*stuff)(args) to (*stuff)(). Rewrite word(args) to word(), + * but only if the word was preceded by a word, '*' or '}'. Leave + * anything else alone. + */ + + for (p = s->head; p; p = p->next) { + if (IS_FUNC_PTR_TYPE(p)) { + p = show_func_ptr_type(p, p->next); /* function pointer type */ + } else { + register struct token *q; + register struct token *r; + + tok_show(p); /* other */ + if ((p->tokno == TOK_WORD || p->tokno == '*' || p->tokno == '}') + && (q = p->next) && q->tokno == TOK_WORD + && (r = q->next) && r->tokno == TOK_LIST) { + tok_show(q); /* show name */ + show_empty_list(p = r); /* strip args */ + } + } + } +} + +/* show_func_ptr_type - display function_pointer type using old-style syntax */ + +static struct token *show_func_ptr_type(t1, t2) +struct token *t1; +struct token *t2; +{ + register struct token *s; + + /* + * Rewrite (list1) (list2) to (list1) (). Account for the rare case that + * (list1) is a comma-separated list. That should be an error, but we do + * not want to waste any information. + */ + + for (s = t1->head; s; s = s->next) { + tok_show_ch(s); /* '(' or ',' or ')' */ + show_type(s); /* recurse */ + } + show_empty_list(t2); + return (t2); +} + +/* show_empty_list - display opening and closing parentheses (if available) */ + +static void show_empty_list(t) +register struct token *t; +{ + tok_show_ch(t->head); /* opening paren */ + if (t->tail->tokno == ')') + tok_show_ch(t->tail); /* closing paren */ +} + +/* show_struct_type - display structured type, rewrite function-pointer types */ + +static struct token *show_struct_type(p) +register struct token *p; +{ + tok_show(p); /* opening brace */ + + while (p->next) { /* XXX cannot return 0 */ + p = p->next; + if (IS_FUNC_PTR_TYPE(p)) { + p = show_func_ptr_type(p, p->next); /* function-pointer member */ + } else if (p->tokno == '{') { + p = show_struct_type(p); /* recurse */ + } else { + tok_show(p); /* other */ + if (p->tokno == '}') { + return (p); /* done */ + } + } + } + DPRINTF("/* missing '}' */"); + return (p); +} + +/* is_func_ptr_cast - recognize function-pointer type cast */ + +static int is_func_ptr_cast(t) +register struct token *t; +{ + register struct token *p; + + /* + * Examine superficial structure. Require (list1) (list2). Require that + * list1 begins with a star. + */ + + if (!IS_FUNC_PTR_TYPE(t)) + return (0); + + /* + * Make sure that there is no name in (list1). Do not worry about + * unexpected tokens, because the compiler will complain anyway. + */ + + for (p = t->head->head; p; p = p->next) { + switch (p->tokno) { + case TOK_LIST: /* recurse */ + return (is_func_ptr_cast(p)); + case TOK_WORD: /* name in list */ + return (0); + case '[': + return (1); /* dimension may be a macro */ + } + } + return (1); /* no name found */ +} + +/* check_cast - display ()-delimited, comma-separated list */ + +static void check_cast(t) +struct token *t; +{ + register struct token *s; + register struct token *p; + + /* + * Rewrite function-pointer types and function-pointer casts. Do not + * blindly rewrite (*list1)(list2) to (*list1)(). Function argument lists + * are about the only thing we can discard without provoking diagnostics + * from the compiler. + */ + + for (s = t->head; s; s = s->next) { + tok_show_ch(s); /* '(' or ',' or ')' */ + for (p = s->head; p; p = p->next) { + switch (p->tokno) { + case TOK_LIST: + if (is_func_ptr_cast(p)) { /* not: IS_FUNC_PTR_TYPE(p) */ + p = show_func_ptr_type(p, p->next); + } else { + check_cast(p); /* recurse */ + } + break; + case '{': + p = show_struct_type(p); /* rewrite func. ptr. types */ + break; + default: + tok_show(p); + break; + } + } + } +} + +/* block_dcls - on the fly rewrite decls/initializers at start of block */ + +static void block_dcls() +{ + register struct token *t; + + /* + * Away from the top level, a declaration should be preceded by type or + * storage-class information. That is why inside blocks, structs and + * unions we insist on reading one word before passing the _next_ token + * to the dcl_flush() function. + * + * Struct and union declarations look the same everywhere: we make an + * exception for these more regular constructs and pass the "struct" and + * "union" tokens to the type_dcl() function. + */ + + while (t = tok_class()) { + switch (t->tokno) { + case TOK_WSPACE: /* preserve white space */ + case '\n': /* preserve line count */ + tok_flush(t); + break; + case TOK_WORD: /* type declarations? */ + tok_flush(t); /* advance to next token */ + t = tok_class(); /* null return is ok */ + /* FALLTRHOUGH */ + case TOK_COMPOSITE: /* struct or union */ + if ((t = dcl_flush(t)) == 0) + break; + /* FALLTRHOUGH */ + default: /* end of declarations */ + DPRINTF("/* end dcls */"); + /* FALLTRHOUGH */ + case '}': /* end of block */ + tok_unget(t); + return; + } + } +} + +/* block_flush - rewrite struct, union or statement block on the fly */ + +static void block_flush(t) +register struct token *t; +{ + static int count = 0; + + tok_flush(t); + DPRINTF("/*%d*/", ++count); + + /* + * Rewrite function pointer types in declarations and function pointer + * casts in initializers at start of block. + */ + + block_dcls(); + + /* Remainder of block: only rewrite function pointer casts. */ + + while (t = tok_class()) { + if (t->tokno == TOK_LIST) { + check_cast_flush(t); + } else if (t->tokno == '{') { + block_flush(t); + } else { + tok_flush(t); + if (t->tokno == '}') { + DPRINTF("/*%d*/", count--); + return; + } + } + } + DPRINTF("/* missing '}' */"); +} + +/* pair_flush - on the fly rewrite casts in grouped stuff */ + +static void pair_flush(t, start, stop) +register struct token *t; +register int start; +register int stop; +{ + tok_flush(t); + + while (t = tok_class()) { + if (t->tokno == start) { /* recurse */ + pair_flush(t, start, stop); + } else if (t->tokno == TOK_LIST) { /* expression or cast */ + check_cast_flush(t); + } else { /* other, copy */ + tok_flush(t); + if (t->tokno == stop) { /* done */ + return; + } + } + } + DPRINTF("/* missing '%c' */", stop); +} + +/* initializer - on the fly rewrite casts in initializer */ + +static void initializer() +{ + register struct token *t; + + while (t = tok_class()) { + switch (t->tokno) { + case ',': /* list separator */ + case ';': /* list terminator */ + tok_unget(t); + return; + case TOK_LIST: /* expression or cast */ + check_cast_flush(t); + break; + case '[': /* array subscript, may nest */ + pair_flush(t, '[', ']'); + break; + case '{': /* structured data, may nest */ + pair_flush(t, '{', '}'); + break; + default: /* other, just copy */ + tok_flush(t); + break; + } + } +} + +/* func_ptr_dcl_flush - rewrite function pointer stuff */ + +static struct token *func_ptr_dcl_flush(list) +register struct token *list; +{ + register struct token *t; + register struct token *t2; + + /* + * Ignore blanks and newlines because we are too lazy to maintain more + * than one token worth of lookahead. The output routines will regenerate + * discarded newline tokens. + */ + + while (t = tok_class()) { + switch (t->tokno) { + case TOK_WSPACE: + case '\n': + tok_free(t); + break; + case TOK_LIST: + /* Function pointer or function returning pointer to function. */ + while ((t2 = tok_class()) /* skip blanks etc. */ + &&(t2->tokno == TOK_WSPACE || t2->tokno == '\n')) + tok_free(t2); + switch (t2 ? t2->tokno : 0) { + case '{': /* function heading (new) */ + fpf_header(list, t); + break; + case TOK_WORD: /* function heading (old) */ + tok_show(list); + tok_show(t); + break; + default: /* func pointer type */ + (void) show_func_ptr_type(list, t); + break; + } + tok_free(list); + tok_free(t); + if (t2) + tok_unget(t2); + return (0); + default: /* not a declaration */ + tok_unget(t); + return (list); + } + } + + /* Hit EOF; must be mistake, but do not waste any information. */ + + return (list); +} + +/* function_dcl_flush - rewrite function { heading, type declaration } */ + +static struct token *function_dcl_flush(list) +register struct token *list; +{ + register struct token *t; + + /* + * Ignore blanks and newlines because we are too lazy to maintain more + * than one token worth of lookahead. The output routines will regenerate + * ignored newline tokens. + */ + + while (t = tok_class()) { + switch (t->tokno) { + case TOK_WSPACE: + case '\n': + tok_free(t); + break; + case '{': + /* Function heading: word (list) { -> old style heading */ + header_flush(list); + tok_unget(t); + return (0); + case TOK_WORD: + /* Old-style function heading: word (list) word... */ + tok_flush(list); + tok_unget(t); + return (0); + case TOK_LIST: + /* Function pointer: word (list1) (list2) -> word (list1) () */ + tok_flush(list); + show_empty_list(t); + tok_free(t); + return (0); + case ',': + case ';': + /* Function type declaration: word (list) -> word () */ + show_empty_list(list); + tok_free(list); + tok_unget(t); + return (0); + default: + /* Something else, reject the list. */ + tok_unget(t); + return (list); + } + } + + /* Hit EOF; must be mistake, but do not waste any information. */ + + return (list); +} + +/* dcl_flush - parse declaration on the fly, return rejected token */ + +static struct token *dcl_flush(t) +register struct token *t; +{ + register int got_word; + + /* + * Away from the top level, type or storage-class information is required + * for an (extern or forward) function type declaration or a variable + * declaration. + * + * With our naive word-counting approach, this means that the caller should + * read one word before passing the next token to us. This is how we + * distinguish, for example, function declarations from function calls. + * + * An exception are structs and unions, because they look the same at any + * level. The caller should give is the "struct" or "union" token. + */ + + for (got_word = 0; t; t = tok_class()) { + switch (t->tokno) { + case TOK_WSPACE: /* advance past blanks */ + case '\n': /* advance past newline */ + case '*': /* indirection: keep trying */ + tok_flush(t); + break; + case TOK_WORD: /* word: keep trying */ + case TOK_COMPOSITE: /* struct or union */ + got_word = 1; + tok_flush(t); + break; + default: + + /* + * Function pointer types can be preceded by zero or more words + * (at least one when not at the top level). Other stuff can be + * accepted only after we have seen at least one word (two words + * when not at the top level). See also the above comment on + * structs and unions. + */ + + if (t->tokno == TOK_LIST && LIST_BEGINS_WITH_STAR(t)) { + if (t = func_ptr_dcl_flush(t)) { + return (t); /* reject token */ + } else { + got_word = 1; /* for = and [ and , and ; */ + } + } else if (got_word == 0) { + return (t); /* reject token */ + } else { + switch (t->tokno) { + case TOK_LIST: /* function type */ + if (t = function_dcl_flush(t)) + return (t); /* reject token */ + break; + case '[': /* dimension, does not nest */ + pair_flush(t, '[', ']'); + break; + case '=': /* initializer follows */ + tok_flush(t); + initializer(); /* rewrite casts */ + break; + case '{': /* struct, union, may nest */ + block_flush(t); /* use code for stmt blocks */ + break; + case ',': /* separator: keep trying */ + got_word = 0; + tok_flush(t); + break; + case ';': /* terminator: succeed */ + tok_flush(t); + return (0); + default: /* reject token */ + return (t); + } + } + } + } + return (0); /* hit EOF */ +} diff --git a/unproto5.shar b/unproto5.shar new file mode 100644 index 0000000..27093ff --- /dev/null +++ b/unproto5.shar @@ -0,0 +1,4191 @@ +#! /bin/sh +# This is a shell archive. Remove anything before this line, then unpack +# it by saving it into a file and typing "sh file". To overwrite existing +# files, type "sh file -c". You can also feed this as standard input via +# unshar, or by typing "sh <file", e.g.. If this archive is complete, you +# will see the following message at the end: +# "End of shell archive." +# Contents: README unproto.c tok_io.c tok_class.c tok_pool.c vstring.c +# symbol.c error.c hash.c strsave.c error.h token.h vstring.h +# symbol.h Makefile cpp.sh acc.sh stdarg.h stddef.h stdlib.h +# varargs.c example.c example.out unproto.1 +# Wrapped by wietse@wzv on Fri Jun 18 22:48:56 1993 +PATH=/bin:/usr/bin:/usr/ucb ; export PATH +if test -f README -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"README\" +else +echo shar: Extracting \"README\" \(6761 characters\) +sed "s/^X//" >README <<'END_OF_README' +X@(#) README 1.6 93/06/18 22:29:34 +X +Xunproto - Compile ANSI C with traditional UNIX C compiler +X +XDescription: +X------------ +X +XThis is a filter that sits in between the UNIX C preprocessor and the +Xnext UNIX C compiler stage, on the fly transforming ANSI C syntax to +Xold C syntax. Line number information is preserved so that compiler +Xdiagnostics still make sense. It runs at roughly the same speed as +X/lib/cpp, so it has negligible impact on compilation time. +X +XTypically, the program is invoked by the native UNIX C compiler as an +Xalternate preprocessor. The unprototyper in turn invokes the native C +Xpreprocessor and massages its output. Similar tricks can be used with +Xthe lint(1) command. Details are given below. +X +XThe filter rewrites ANSI-style function headings, function pointer +Xtypes and type casts, function prototypes, and combinations thereof. +XUnlike some other unprototypers, this one is fully recursive and does +Xnot depend on source file layout (see the example.c file). +X +XBesides the rewriting of argument lists, the program does the following +Xtransformations: string concatenation, conversion of \a and \x escape +Xsequences to their octal equivalents, translation of the __TIME__ and +X__DATE__ macros, optional mapping of `void *' to `char *', and optional +Xmapping of plain `void' to `int'. +X +XThe unprototyper provides hooks for compilers that require special +Xtricks for variadic functions (fortunately, many don't). <stdarg.h> +Xsupport is provided for sparc, mips, mc68k, 80x86, vax, and others. +X +XThe program has been tested with SunOS 4.1.1 (sparc), Ultrix 4.0 and +X4.2 (mips), and Microport System V Release 2 (80286). It should work +Xwith almost every PCC-based UNIX C compiler. +X +XRestrictions: +X------------- +X +XA description of restrictions and workarounds can be found in the +Xunproto.1 manual page. +X +XProblems fixed with this release: +X--------------------------------- +X +XPrototypes and definitions of functions returning pointer to function +Xwere not rewritten to old style. +X +XOperation: +X---------- +X +XThis package implements a non-default C preprocessor (the output from +Xthe default C preprocessor being piped through the unprototyper). How +Xone tells the C compiler to use a non-default preprocessor program is +Xsomewhat compiler-dependent: +X +X SunOS 4.x: cc -Qpath directory_with_alternate_cpp ... +X +X Ultrix 4.x: cc -tp -hdirectory_with_alternate_cpp -B ... +X +X System V.2: cc -Bdirectory_with_alternate_cpp/ -tp ... +X +XExamples of these, and others, can be found in the acc.sh shell script +Xthat emulates an ANSI C compiler. Your C compiler manual page should +Xprovide the necessary information. +X +XA more portable, but less efficient, approach relies on the observation +Xthat almost every UNIX C compiler supports the -E (write preprocessor +Xoutput to stdout) and -P options (preprocess file.c into file.i). Just +Xadd the following lines to your Makefiles: +X +X .c.o: +X $(CC) $(CFLAGS) -E $*.c | unproto >$*.i # simulate -P option +X $(CC) $(CFLAGS) -c $*.i +X rm -f $*.i +X +XOn some systems the lint(1) command is just a shell script, and writing +Xa version that uses the unprototyper should not be too hard. With SunOS +X4.x, /usr/bin/lint is not a shell script, but it does accept the same +Xsyntax as the cc(1) command for the specification of a non-default +Xcompiler pass. +X +XYou may have to do some research on the lint command provided with your +Xown machine. +X +XConfiguration: +X-------------- +X +XCheck the contents of the `stdarg.h' file provided with this package. +XThis file serves a dual purpose: (1) on systems that do not provide a +Xstdarg.h file, it should be included by C source files that implements +XANSI-style variadic functions; (2) it is also used to configure the +Xunprototyper so that it emits the proper magic when it sees `...'. +X +XThe `stdarg.h' file has support for sparc, mips, and for compilers that +Xpass arguments via the stack (typical for 80*86, mc68k and vax). It +Xgives general hints for other compilers. +X +XThe other sample header files (stddef.h and stdlib.h) are not required +Xto build the unprototyper. +X +XThe `varargs.c' file provided with this package can be used to verify +Xthat the `stdarg.h' file has been set up correctly. +X +XIf your C compiler has no hooks for an alternate preprocessor (the +Xunprototyper will be used as: `cc cflags -E file.c | unproto >file.i'), +Xbuild the `unproto' executable without the `PIPE_THROUGH_CPP' feature. +XDetails are given in the Makefile. +X +XOtherwise, the `cpp.sh' shell script can be used to set up the pipe +Xbetween the native C preprocessor and the unprototyper command. The +Xscript assumes that the unprototyper binary is called `unproto', and +Xthat it was compiled without the `PIPE_THROUGH_CPP' feature. See the +XMakefile and the `cpp.sh' script for details and for a description of +Xpossible problems with this approach. +X +XThe overhead and problems of shell-script interpretation can be avoided +Xby letting the unprototyper itself pipe its standard input through the +XC preprocessor. For this mode of operation, the unprototyper binary +Xshould be called `cpp', and the `unproto.c' source file should be +Xcompiled with the `PIPE_THROUGH_CPP' macro defined as the absolute +Xpathname of the native C preprocessor (usually `/lib/cpp'). See the +XMakefile for details. +X +XInstallation: +X------------- +X +XInstall the `unproto.1' manual page in a suitable place. If your system +Xdoes not provide a `stdarg.h' file, find a suitable place for the one +Xprovided with the unprototyper and install it there. The same goes for +Xthe sample stddef.h and stdlib.h files; make sure that the definitions +Xin there apply to your environment. Most or all of the latter files are +Xalready part of Ultrix 4.x and SunOS 4.1.1. +X +XThe ANSI float.h and limits.h files can be generated with the config +Xprogram by Steve Pemberton (comp.sources.misc volume 10, issue 62, +Xavailable from ftp.uu.net as comp.sources.misc/volume10/config42.Z). +X +XIf you run the unprototyper with "cc -E" just install the `unproto' +Xbinary; the `cpp' and `acc' shell scripts will not be needed. +X +XIf you use the `cpp' shell script to pipe the preprocessor output +Xthrough the unprototyper program, install the `unproto' binary in a +Xplace where the `cpp' shell script can find it, and install the `cpp' +Xshell script in a suitable place. Edit the `acc' shell script and +Xinstall it in a suitable place. From now on, type `acc' instead of +X`cc'. +X +XIf the unprototyper itself opens the pipe to the C preprocessor (i.e. +Xthe unprototyper was built with the `PIPE_THROUGH_CPP' macro defined), +Xinstall the `cpp' unprototyper binary in a suitable place. Edit the +X`acc' shell script and install it in a suitable place. From now on, +Xtype `acc' instead of `cc'. +X +X Wietse Venema +X wietse@wzv.win.tue.nl +X Mathematics and Computing Science +X Eindhoven University of Technology +X The Netherlands +END_OF_README +if test 6761 -ne `wc -c <README`; then + echo shar: \"README\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f unproto.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"unproto.c\" +else +echo shar: Extracting \"unproto.c\" \(27341 characters\) +sed "s/^X//" >unproto.c <<'END_OF_unproto.c' +X/*++ +X/* NAME +X/* unproto 1 +X/* SUMMARY +X/* compile ANSI C with traditional UNIX C compiler +X/* PACKAGE +X/* unproto +X/* SYNOPSIS +X/* /somewhere/cpp ... +X/* +X/* cc cflags -E file.c | unproto >file.i; cc cflags -c file.i +X/* DESCRIPTION +X/* This document describes a filter that sits in between the UNIX +X/* C preprocessor and the next UNIX C compiler stage, on the fly rewriting +X/* ANSI-style syntax to old-style syntax. Typically, the program is +X/* invoked by the native UNIX C compiler as an alternate preprocessor. +X/* The unprototyper in turn invokes the native C preprocessor and +X/* massages its output. Similar tricks can be used with the lint(1) +X/* command. +X/* +X/* Language constructs that are always rewritten: +X/* .TP +X/* function headings, prototypes, pointer types +X/* ANSI-C style function headings, function prototypes, function +X/* pointer types and type casts are rewritten to old style. +X/* <stdarg.h> support is provided for functions with variable-length +X/* argument lists. +X/* .TP +X/* character and string constants +X/* The \\a and \\x escape sequences are rewritten to their (three-digit) +X/* octal equivalents. +X/* +X/* Multiple string tokens are concatenated; an arbitrary number of +X/* whitespace or comment tokens may appear between successive +X/* string tokens. +X/* +X/* Within string constants, octal escape sequences are rewritten to the +X/* three-digit \\ddd form, so that string concatenation produces correct +X/* results. +X/* .TP +X/* date and time +X/* The __DATE__ and __TIME__ tokens are replaced by string constants +X/* of the form "Mmm dd yyyy" and "hh:mm:ss", respectively. The result +X/* is subjected to string concatenation, just like any other string +X/* constant. +X/* .PP +X/* Language constructs that are rewritten only if the program has been +X/* configured to do so: +X/* .TP +X/* void types +X/* The unprototyper can be configured to rewrite "void *" to "char *", +X/* and even to rewrite plain "void" to "int". +X/* These features are configurable because many traditional UNIX C +X/* compilers do not need them. +X/* +X/* Note: (void) argument lists are always replaced by empty ones. +X/* .PP +X/* ANSI C constructs that are not rewritten because the traditional +X/* UNIX C preprocessor provides suitable workarounds: +X/* .TP +X/* const and volatile +X/* Use the "-Dconst=" and/or "-Dvolatile=" preprocessor directives to +X/* get rid of unimplemented keywords. +X/* .TP +X/* token pasting and stringizing +X/* The traditional UNIX C preprocessor provides excellent alternatives. +X/* For example: +X/* +X/* .nf +X/* .ne 2 +X/* #define string(bar) "bar" /* instead of: # x */ +X/* #define paste(x,y) x/**\/y /* instead of: x##y */ +X/* .fi +X/* +X/* There is a good reason why the # and ## operators are not implemented +X/* in the unprototyper. +X/* After program text has gone through a non-ANSI C preprocessor, all +X/* information about the grouping of the operands of # and ## is lost. +X/* Thus, if the unprototyper were to perform these operations, it would +X/* produce correct results only in the most trivial cases. Operands +X/* with embedded blanks, operands that expand to null tokens, and nested +X/* use of # and/or ## would cause all kinds of obscure problems. +X/* .PP +X/* Unsupported ANSI features: +X/* .TP +X/* trigraphs and #pragmas +X/* Trigraphs are useful only for systems with broken character sets. +X/* If the local compiler chokes on #pragma, insert a blank before the +X/* "#" character, and enclose the offending directive between #ifdef +X/* and #endif. +X/* SEE ALSO +X/* .ad +X/* .fi +X/* cc(1), how to specify a non-default C preprocessor. +X/* Some versions of the lint(1) command are implemented as a shell +X/* script. It should require only minor modification for integration +X/* with the unprototyper. Other versions of the lint(1) command accept +X/* the same command syntax as the C compiler for the specification of a +X/* non-default preprocessor. Some research may be needed. +X/* FILES +X/* /wherever/stdarg.h, provided with the unproto filter. +X/* DIAGNOSTICS +X/* Problems are reported on the standard error stream. +X/* A non-zero exit status means that there was a problem. +X/* BUGS +X/* The unprototyper should be run on preprocessed source only: +X/* unexpanded macros may confuse the program. +X/* +X/* Declarations of (object) are misunderstood and will result in +X/* syntax errors: the objects between parentheses disappear. +X/* +X/* Sometimes does not preserve whitespace after parentheses and commas. +X/* This is a purely aesthetical matter, and the compiler should not care. +X/* Whitespace within string constants is, of course, left intact. +X/* +X/* Does not generate explicit type casts for function-argument +X/* expressions. The lack of explicit conversions between integral +X/* and/or pointer argument types should not be a problem in environments +X/* where sizeof(int) == sizeof(long) == sizeof(pointer). A more serious +X/* problem is the lack of automatic type conversions between integral and +X/* floating-point argument types. Let lint(1) be your friend. +X/* AUTHOR(S) +X/* Wietse Venema (wietse@wzv.win.tue.nl) +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* LAST MODIFICATION +X/* 93/06/18 22:29:37 +X/* VERSION/RELEASE +X/* 1.6 +X/*--*/ +X +Xstatic char unproto_sccsid[] = "@(#) unproto.c 1.6 93/06/18 22:29:37"; +X +X/* C library */ +X +X#include <sys/types.h> +X#include <sys/stat.h> +X#include <stdio.h> +X#include <errno.h> +X +Xextern void exit(); +Xextern int optind; +Xextern char *optarg; +Xextern int getopt(); +X +X/* Application-specific stuff */ +X +X#include "vstring.h" +X#include "stdarg.h" +X#include "token.h" +X#include "error.h" +X#include "symbol.h" +X +X/* Forward declarations. */ +X +Xstatic struct token *dcl_flush(); +Xstatic void block_flush(); +Xstatic void block_dcls(); +Xstatic struct token *show_func_ptr_type(); +Xstatic struct token *show_struct_type(); +Xstatic void show_arg_name(); +Xstatic void show_type(); +Xstatic void pair_flush(); +Xstatic void check_cast(); +Xstatic void show_empty_list(); +X +X#define check_cast_flush(t) (check_cast(t), tok_free(t)) +X +X#ifdef PIPE_THROUGH_CPP +Xstatic int pipe_stdin_through_cpp(); +X#endif +X +X/* Disable debugging printfs while preserving side effects. */ +X +X#ifdef DEBUG +X#define DPRINTF printf +X#else +X#define DPRINTF (void) +X#endif +X +X/* An attempt to make some complicated expressions a bit more readable. */ +X +X#define STREQ(x,y) (*(x) == *(y) && !strcmp((x),(y))) +X +X#define LAST_ARG_AND_EQUAL(s,c) ((s)->next && (s)->next->next == 0 \ +X && (s)->head && ((s)->head == (s)->tail) \ +X && (STREQ((s)->head->vstr->str, (c)))) +X +X#define LIST_BEGINS_WITH_STAR(s) (s->head->head && s->head->head->tokno == '*') +X +X#define IS_FUNC_PTR_TYPE(s) (s->tokno == TOK_LIST && s->next \ +X && s->next->tokno == TOK_LIST \ +X && LIST_BEGINS_WITH_STAR(s)) +X +X/* What to look for to detect a (void) argument list. */ +X +X#ifdef MAP_VOID +X#define VOID_ARG "int" /* bare "void" is mapped to "int" */ +X#else +X#define VOID_ARG "void" /* bare "void" is left alone */ +X#endif +X +X/* main - driver */ +X +Xint main(argc, argv) +Xint argc; +Xchar **argv; +X{ +X register struct token *t; +X#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ +X int cpp_status; +X int wait_pid; +X int cpp_pid; +X +X cpp_pid = pipe_stdin_through_cpp(argv); +X#endif +X +X sym_init(); /* prime the symbol table */ +X +X while (t = tok_class()) { +X if (t = dcl_flush(t)) { /* try declaration */ +X if (t->tokno == '{') { /* examine rejected token */ +X block_flush(t); /* body */ +X } else { +X tok_flush(t); /* other, recover */ +X } +X } +X } +X +X#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ +X while ((wait_pid = wait(&cpp_status)) != -1 && wait_pid != cpp_pid) +X /* void */ ; +X return (errcount != 0 || wait_pid != cpp_pid || cpp_status != 0); +X#else +X return (errcount != 0); +X#endif +X} +X +X#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ +X +X/* pipe_stdin_through_cpp - avoid shell script overhead */ +X +Xstatic int pipe_stdin_through_cpp(argv) +Xchar **argv; +X{ +X int pipefds[2]; +X int pid; +X char **cpptr = argv; +X int i; +X struct stat st; +X +X /* +X * The code that sets up the pipe requires that file descriptors 0,1,2 +X * are already open. All kinds of mysterious things will happen if that +X * is not the case. The following loops makes sure that descriptors 0,1,2 +X * are set up properly. +X */ +X +X for (i = 0; i < 3; i++) { +X if (fstat(i, &st) == -1 && open("/dev/null", 2) != i) { +X perror("open /dev/null"); +X exit(1); +X } +X } +X +X /* +X * With most UNIX implementations, the second non-option argument to +X * /lib/cpp specifies the output file. If an output file other than +X * stdout is specified, we must force /lib/cpp to write to stdout, and we +X * must redirect our own standard output to the specified output file. +X */ +X +X#define IS_OPTION(cp) ((cp)[0] == '-' && (cp)[1] != 0) +X +X /* Skip to first non-option argument, if any. */ +X +X while (*++cpptr && IS_OPTION(*cpptr)) +X /* void */ ; +X +X /* +X * Assume that the first non-option argument is the input file name. The +X * next argument could be the output destination or an option (System V +X * Release 2 /lib/cpp gets the options *after* the file arguments). +X */ +X +X if (*cpptr && *++cpptr && **cpptr != '-') { +X +X /* +X * The first non-option argument is followed by another argument that +X * is not an option ("-stuff") or a hyphen ("-"). Redirect our own +X * standard output before we clobber the file name. +X */ +X +X if (freopen(*cpptr, "w", stdout) == 0) { +X perror(*cpptr); +X exit(1); +X } +X /* Clobber the file name argument so that /lib/cpp writes to stdout */ +X +X *cpptr = "-"; +X } +X /* Set up the pipe that connects /lib/cpp to our standard input. */ +X +X if (pipe(pipefds)) { +X perror("pipe"); +X exit(1); +X } +X switch (pid = fork()) { +X case -1: /* error */ +X perror("fork"); +X exit(1); +X /* NOTREACHED */ +X case 0: /* child */ +X (void) close(pipefds[0]); /* close reading end */ +X (void) close(1); /* connect stdout to pipe */ +X if (dup(pipefds[1]) != 1) +X fatal("dup() problem"); +X (void) close(pipefds[1]); /* close redundant fd */ +X (void) execv(PIPE_THROUGH_CPP, argv); +X perror(PIPE_THROUGH_CPP); +X exit(1); +X /* NOTREACHED */ +X default: /* parent */ +X (void) close(pipefds[1]); /* close writing end */ +X (void) close(0); /* connect stdin to pipe */ +X if (dup(pipefds[0]) != 0) +X fatal("dup() problem"); +X close(pipefds[0]); /* close redundant fd */ +X return (pid); +X } +X} +X +X#endif +X +X/* show_arg_names - display function argument names */ +X +Xstatic void show_arg_names(t) +Xregister struct token *t; +X{ +X register struct token *s; +X +X /* Do argument names, but suppress void and rewrite trailing ... */ +X +X if (LAST_ARG_AND_EQUAL(t->head, VOID_ARG)) { +X show_empty_list(t); /* no arguments */ +X } else { +X for (s = t->head; s; s = s->next) { /* foreach argument... */ +X if (LAST_ARG_AND_EQUAL(s, "...")) { +X#ifdef _VA_ALIST_ /* see ./stdarg.h */ +X tok_show_ch(s); /* ',' */ +X put_str(_VA_ALIST_); /* varargs magic */ +X#endif +X } else { +X tok_show_ch(s); /* '(' or ',' or ')' */ +X show_arg_name(s); /* extract argument name */ +X } +X } +X } +X} +X +X/* show_arg_types - display function argument types */ +X +Xstatic void show_arg_types(t) +Xregister struct token *t; +X{ +X register struct token *s; +X +X /* Do argument types, but suppress void and trailing ... */ +X +X if (!LAST_ARG_AND_EQUAL(t->head, VOID_ARG)) { +X for (s = t->head; s; s = s->next) { /* foreach argument... */ +X if (LAST_ARG_AND_EQUAL(s, "...")) { +X#ifdef _VA_DCL_ /* see ./stdarg.h */ +X put_str(_VA_DCL_); /* varargs magic */ +X put_nl(); /* make output look nicer */ +X#endif +X } else { +X if (s->head != s->tail) { /* really new-style argument? */ +X show_type(s); /* rewrite type info */ +X put_ch(';'); +X put_nl(); /* make output look nicer */ +X } +X } +X } +X } +X} +X +X/* header_flush - rewrite new-style function heading to old style */ +X +Xstatic void header_flush(t) +Xregister struct token *t; +X{ +X show_arg_names(t); /* show argument names */ +X put_nl(); /* make output look nicer */ +X show_arg_types(t); /* show argument types */ +X tok_free(t); /* discard token */ +X} +X +X/* fpf_header_names - define func returning ptr to func, no argument types */ +X +Xstatic void fpf_header_names(list) +Xstruct token *list; +X{ +X register struct token *s; +X register struct token *p; +X +X /* +X * Recurse until we find the argument list. Account for the rare case +X * that list is a comma-separated list (which should be a syntax error). +X * Display old-style fuction argument names. +X */ +X +X for (s = list->head; s; s = s->next) { +X tok_show_ch(s); /* '(' or ',' or ')' */ +X for (p = s->head; p; p = p->next) { +X if (p->tokno == TOK_LIST) { +X if (IS_FUNC_PTR_TYPE(p)) { /* recurse */ +X fpf_header_names(p); +X show_empty_list(p = p->next); +X } else { /* display argument names */ +X show_arg_names(p); +X } +X } else { /* pass through other stuff */ +X tok_show(p); +X } +X } +X } +X} +X +X/* fpf_header_types - define func returning ptr to func, argument types only */ +X +Xstatic void fpf_header_types(list) +Xstruct token *list; +X{ +X register struct token *s; +X register struct token *p; +X +X /* +X * Recurse until we find the argument list. Account for the rare case +X * that list is a comma-separated list (which should be a syntax error). +X * Display old-style function argument types. +X */ +X +X for (s = list->head; s; s = s->next) { +X for (p = s->head; p; p = p->next) { +X if (p->tokno == TOK_LIST) { +X if (IS_FUNC_PTR_TYPE(p)) { /* recurse */ +X fpf_header_types(p); +X p = p->next; +X } else { /* display argument types */ +X show_arg_types(p); +X } +X } +X } +X } +X} +X +X/* fpf_header - define function returning pointer to function */ +X +Xstatic void fpf_header(l1, l2) +Xstruct token *l1; +Xstruct token *l2; +X{ +X fpf_header_names(l1); /* strip argument types */ +X show_empty_list(l2); /* strip prototype */ +X put_nl(); /* nicer output */ +X fpf_header_types(l1); /* show argument types */ +X} +X +X/* skip_enclosed - skip over enclosed tokens */ +X +Xstatic struct token *skip_enclosed(p, stop) +Xregister struct token *p; +Xregister int stop; +X{ +X register int start = p->tokno; +X +X /* Always return a pointer to the last processed token, never NULL. */ +X +X while (p->next) { +X p = p->next; +X if (p->tokno == start) { +X p = skip_enclosed(p, stop); /* recurse */ +X } else if (p->tokno == stop) { +X break; /* done */ +X } +X } +X return (p); +X} +X +X/* show_arg_name - extract argument name from argument type info */ +X +Xstatic void show_arg_name(s) +Xregister struct token *s; +X{ +X if (s->head) { +X register struct token *p; +X register struct token *t = 0; +X +X /* Find the last interesting item. */ +X +X for (p = s->head; p; p = p->next) { +X if (p->tokno == TOK_WORD) { +X t = p; /* remember last word */ +X } else if (p->tokno == '{') { +X p = skip_enclosed(p, '}'); /* skip structured stuff */ +X } else if (p->tokno == '[') { +X break; /* dimension may be a macro */ +X } else if (IS_FUNC_PTR_TYPE(p)) { +X t = p; /* or function pointer */ +X p = p->next; +X } +X } +X +X /* Extract argument name from last interesting item. */ +X +X if (t) { +X if (t->tokno == TOK_LIST) +X show_arg_name(t->head); /* function pointer, recurse */ +X else +X tok_show(t); /* print last word */ +X } +X } +X} +X +X/* show_type - rewrite type to old-style syntax */ +X +Xstatic void show_type(s) +Xregister struct token *s; +X{ +X register struct token *p; +X +X /* +X * Rewrite (*stuff)(args) to (*stuff)(). Rewrite word(args) to word(), +X * but only if the word was preceded by a word, '*' or '}'. Leave +X * anything else alone. +X */ +X +X for (p = s->head; p; p = p->next) { +X if (IS_FUNC_PTR_TYPE(p)) { +X p = show_func_ptr_type(p, p->next); /* function pointer type */ +X } else { +X register struct token *q; +X register struct token *r; +X +X tok_show(p); /* other */ +X if ((p->tokno == TOK_WORD || p->tokno == '*' || p->tokno == '}') +X && (q = p->next) && q->tokno == TOK_WORD +X && (r = q->next) && r->tokno == TOK_LIST) { +X tok_show(q); /* show name */ +X show_empty_list(p = r); /* strip args */ +X } +X } +X } +X} +X +X/* show_func_ptr_type - display function_pointer type using old-style syntax */ +X +Xstatic struct token *show_func_ptr_type(t1, t2) +Xstruct token *t1; +Xstruct token *t2; +X{ +X register struct token *s; +X +X /* +X * Rewrite (list1) (list2) to (list1) (). Account for the rare case that +X * (list1) is a comma-separated list. That should be an error, but we do +X * not want to waste any information. +X */ +X +X for (s = t1->head; s; s = s->next) { +X tok_show_ch(s); /* '(' or ',' or ')' */ +X show_type(s); /* recurse */ +X } +X show_empty_list(t2); +X return (t2); +X} +X +X/* show_empty_list - display opening and closing parentheses (if available) */ +X +Xstatic void show_empty_list(t) +Xregister struct token *t; +X{ +X tok_show_ch(t->head); /* opening paren */ +X if (t->tail->tokno == ')') +X tok_show_ch(t->tail); /* closing paren */ +X} +X +X/* show_struct_type - display structured type, rewrite function-pointer types */ +X +Xstatic struct token *show_struct_type(p) +Xregister struct token *p; +X{ +X tok_show(p); /* opening brace */ +X +X while (p->next) { /* XXX cannot return 0 */ +X p = p->next; +X if (IS_FUNC_PTR_TYPE(p)) { +X p = show_func_ptr_type(p, p->next); /* function-pointer member */ +X } else if (p->tokno == '{') { +X p = show_struct_type(p); /* recurse */ +X } else { +X tok_show(p); /* other */ +X if (p->tokno == '}') { +X return (p); /* done */ +X } +X } +X } +X DPRINTF("/* missing '}' */"); +X return (p); +X} +X +X/* is_func_ptr_cast - recognize function-pointer type cast */ +X +Xstatic int is_func_ptr_cast(t) +Xregister struct token *t; +X{ +X register struct token *p; +X +X /* +X * Examine superficial structure. Require (list1) (list2). Require that +X * list1 begins with a star. +X */ +X +X if (!IS_FUNC_PTR_TYPE(t)) +X return (0); +X +X /* +X * Make sure that there is no name in (list1). Do not worry about +X * unexpected tokens, because the compiler will complain anyway. +X */ +X +X for (p = t->head->head; p; p = p->next) { +X switch (p->tokno) { +X case TOK_LIST: /* recurse */ +X return (is_func_ptr_cast(p)); +X case TOK_WORD: /* name in list */ +X return (0); +X case '[': +X return (1); /* dimension may be a macro */ +X } +X } +X return (1); /* no name found */ +X} +X +X/* check_cast - display ()-delimited, comma-separated list */ +X +Xstatic void check_cast(t) +Xstruct token *t; +X{ +X register struct token *s; +X register struct token *p; +X +X /* +X * Rewrite function-pointer types and function-pointer casts. Do not +X * blindly rewrite (*list1)(list2) to (*list1)(). Function argument lists +X * are about the only thing we can discard without provoking diagnostics +X * from the compiler. +X */ +X +X for (s = t->head; s; s = s->next) { +X tok_show_ch(s); /* '(' or ',' or ')' */ +X for (p = s->head; p; p = p->next) { +X switch (p->tokno) { +X case TOK_LIST: +X if (is_func_ptr_cast(p)) { /* not: IS_FUNC_PTR_TYPE(p) */ +X p = show_func_ptr_type(p, p->next); +X } else { +X check_cast(p); /* recurse */ +X } +X break; +X case '{': +X p = show_struct_type(p); /* rewrite func. ptr. types */ +X break; +X default: +X tok_show(p); +X break; +X } +X } +X } +X} +X +X/* block_dcls - on the fly rewrite decls/initializers at start of block */ +X +Xstatic void block_dcls() +X{ +X register struct token *t; +X +X /* +X * Away from the top level, a declaration should be preceded by type or +X * storage-class information. That is why inside blocks, structs and +X * unions we insist on reading one word before passing the _next_ token +X * to the dcl_flush() function. +X * +X * Struct and union declarations look the same everywhere: we make an +X * exception for these more regular constructs and pass the "struct" and +X * "union" tokens to the type_dcl() function. +X */ +X +X while (t = tok_class()) { +X switch (t->tokno) { +X case TOK_WSPACE: /* preserve white space */ +X case '\n': /* preserve line count */ +X tok_flush(t); +X break; +X case TOK_WORD: /* type declarations? */ +X tok_flush(t); /* advance to next token */ +X t = tok_class(); /* null return is ok */ +X /* FALLTRHOUGH */ +X case TOK_COMPOSITE: /* struct or union */ +X if ((t = dcl_flush(t)) == 0) +X break; +X /* FALLTRHOUGH */ +X default: /* end of declarations */ +X DPRINTF("/* end dcls */"); +X /* FALLTRHOUGH */ +X case '}': /* end of block */ +X tok_unget(t); +X return; +X } +X } +X} +X +X/* block_flush - rewrite struct, union or statement block on the fly */ +X +Xstatic void block_flush(t) +Xregister struct token *t; +X{ +X static int count = 0; +X +X tok_flush(t); +X DPRINTF("/*%d*/", ++count); +X +X /* +X * Rewrite function pointer types in declarations and function pointer +X * casts in initializers at start of block. +X */ +X +X block_dcls(); +X +X /* Remainder of block: only rewrite function pointer casts. */ +X +X while (t = tok_class()) { +X if (t->tokno == TOK_LIST) { +X check_cast_flush(t); +X } else if (t->tokno == '{') { +X block_flush(t); +X } else { +X tok_flush(t); +X if (t->tokno == '}') { +X DPRINTF("/*%d*/", count--); +X return; +X } +X } +X } +X DPRINTF("/* missing '}' */"); +X} +X +X/* pair_flush - on the fly rewrite casts in grouped stuff */ +X +Xstatic void pair_flush(t, start, stop) +Xregister struct token *t; +Xregister int start; +Xregister int stop; +X{ +X tok_flush(t); +X +X while (t = tok_class()) { +X if (t->tokno == start) { /* recurse */ +X pair_flush(t, start, stop); +X } else if (t->tokno == TOK_LIST) { /* expression or cast */ +X check_cast_flush(t); +X } else { /* other, copy */ +X tok_flush(t); +X if (t->tokno == stop) { /* done */ +X return; +X } +X } +X } +X DPRINTF("/* missing '%c' */", stop); +X} +X +X/* initializer - on the fly rewrite casts in initializer */ +X +Xstatic void initializer() +X{ +X register struct token *t; +X +X while (t = tok_class()) { +X switch (t->tokno) { +X case ',': /* list separator */ +X case ';': /* list terminator */ +X tok_unget(t); +X return; +X case TOK_LIST: /* expression or cast */ +X check_cast_flush(t); +X break; +X case '[': /* array subscript, may nest */ +X pair_flush(t, '[', ']'); +X break; +X case '{': /* structured data, may nest */ +X pair_flush(t, '{', '}'); +X break; +X default: /* other, just copy */ +X tok_flush(t); +X break; +X } +X } +X} +X +X/* func_ptr_dcl_flush - rewrite function pointer stuff */ +X +Xstatic struct token *func_ptr_dcl_flush(list) +Xregister struct token *list; +X{ +X register struct token *t; +X register struct token *t2; +X +X /* +X * Ignore blanks and newlines because we are too lazy to maintain more +X * than one token worth of lookahead. The output routines will regenerate +X * discarded newline tokens. +X */ +X +X while (t = tok_class()) { +X switch (t->tokno) { +X case TOK_WSPACE: +X case '\n': +X tok_free(t); +X break; +X case TOK_LIST: +X /* Function pointer or function returning pointer to function. */ +X while ((t2 = tok_class()) /* skip blanks etc. */ +X &&(t2->tokno == TOK_WSPACE || t2->tokno == '\n')) +X tok_free(t2); +X switch (t2 ? t2->tokno : 0) { +X case '{': /* function heading (new) */ +X fpf_header(list, t); +X break; +X case TOK_WORD: /* function heading (old) */ +X tok_show(list); +X tok_show(t); +X break; +X default: /* func pointer type */ +X (void) show_func_ptr_type(list, t); +X break; +X } +X tok_free(list); +X tok_free(t); +X if (t2) +X tok_unget(t2); +X return (0); +X default: /* not a declaration */ +X tok_unget(t); +X return (list); +X } +X } +X +X /* Hit EOF; must be mistake, but do not waste any information. */ +X +X return (list); +X} +X +X/* function_dcl_flush - rewrite function { heading, type declaration } */ +X +Xstatic struct token *function_dcl_flush(list) +Xregister struct token *list; +X{ +X register struct token *t; +X +X /* +X * Ignore blanks and newlines because we are too lazy to maintain more +X * than one token worth of lookahead. The output routines will regenerate +X * ignored newline tokens. +X */ +X +X while (t = tok_class()) { +X switch (t->tokno) { +X case TOK_WSPACE: +X case '\n': +X tok_free(t); +X break; +X case '{': +X /* Function heading: word (list) { -> old style heading */ +X header_flush(list); +X tok_unget(t); +X return (0); +X case TOK_WORD: +X /* Old-style function heading: word (list) word... */ +X tok_flush(list); +X tok_unget(t); +X return (0); +X case TOK_LIST: +X /* Function pointer: word (list1) (list2) -> word (list1) () */ +X tok_flush(list); +X show_empty_list(t); +X tok_free(t); +X return (0); +X case ',': +X case ';': +X /* Function type declaration: word (list) -> word () */ +X show_empty_list(list); +X tok_free(list); +X tok_unget(t); +X return (0); +X default: +X /* Something else, reject the list. */ +X tok_unget(t); +X return (list); +X } +X } +X +X /* Hit EOF; must be mistake, but do not waste any information. */ +X +X return (list); +X} +X +X/* dcl_flush - parse declaration on the fly, return rejected token */ +X +Xstatic struct token *dcl_flush(t) +Xregister struct token *t; +X{ +X register int got_word; +X +X /* +X * Away from the top level, type or storage-class information is required +X * for an (extern or forward) function type declaration or a variable +X * declaration. +X * +X * With our naive word-counting approach, this means that the caller should +X * read one word before passing the next token to us. This is how we +X * distinguish, for example, function declarations from function calls. +X * +X * An exception are structs and unions, because they look the same at any +X * level. The caller should give is the "struct" or "union" token. +X */ +X +X for (got_word = 0; t; t = tok_class()) { +X switch (t->tokno) { +X case TOK_WSPACE: /* advance past blanks */ +X case '\n': /* advance past newline */ +X case '*': /* indirection: keep trying */ +X tok_flush(t); +X break; +X case TOK_WORD: /* word: keep trying */ +X case TOK_COMPOSITE: /* struct or union */ +X got_word = 1; +X tok_flush(t); +X break; +X default: +X +X /* +X * Function pointer types can be preceded by zero or more words +X * (at least one when not at the top level). Other stuff can be +X * accepted only after we have seen at least one word (two words +X * when not at the top level). See also the above comment on +X * structs and unions. +X */ +X +X if (t->tokno == TOK_LIST && LIST_BEGINS_WITH_STAR(t)) { +X if (t = func_ptr_dcl_flush(t)) { +X return (t); /* reject token */ +X } else { +X got_word = 1; /* for = and [ and , and ; */ +X } +X } else if (got_word == 0) { +X return (t); /* reject token */ +X } else { +X switch (t->tokno) { +X case TOK_LIST: /* function type */ +X if (t = function_dcl_flush(t)) +X return (t); /* reject token */ +X break; +X case '[': /* dimension, does not nest */ +X pair_flush(t, '[', ']'); +X break; +X case '=': /* initializer follows */ +X tok_flush(t); +X initializer(); /* rewrite casts */ +X break; +X case '{': /* struct, union, may nest */ +X block_flush(t); /* use code for stmt blocks */ +X break; +X case ',': /* separator: keep trying */ +X got_word = 0; +X tok_flush(t); +X break; +X case ';': /* terminator: succeed */ +X tok_flush(t); +X return (0); +X default: /* reject token */ +X return (t); +X } +X } +X } +X } +X return (0); /* hit EOF */ +X} +END_OF_unproto.c +if test 27341 -ne `wc -c <unproto.c`; then + echo shar: \"unproto.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f tok_io.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"tok_io.c\" +else +echo shar: Extracting \"tok_io.c\" \(15578 characters\) +sed "s/^X//" >tok_io.c <<'END_OF_tok_io.c' +X/*++ +X/* NAME +X/* tok_io 3 +X/* SUMMARY +X/* token I/O +X/* PACKAGE +X/* unproto +X/* SYNOPSIS +X/* #include "token.h" +X/* +X/* struct token *tok_get() +X/* +X/* void tok_flush(t) +X/* struct token *t; +X/* +X/* void tok_show(t) +X/* struct token *t; +X/* +X/* void tok_show_ch(t) +X/* struct token *t; +X/* +X/* void put_str(s) +X/* char *s; +X/* +X/* void put_ch(c) +X/* int c; +X/* +X/* void put_nl() +X/* +X/* char *in_path; +X/* int in_line; +X/* DESCRIPTION +X/* These functions read from stdin and write to stdout. The +X/* tokenizer keeps track of where the token appeared in the input +X/* stream; on output, this information is used to preserve correct +X/* line number information (even after lots of token lookahead or +X/* after function-header rewriting) so that diagnostics from the +X/* next compiler stage make sense. +X/* +X/* tok_get() reads the next token from standard input. It returns +X/* a null pointer when the end of input is reached. +X/* +X/* tok_show() displays the contents of a (possibly composite) token +X/* on the standard output. +X/* +X/* tok_show_ch() displays the contents of a single-character token +X/* on the standard output. The character should not be a newline. +X/* +X/* tok_flush() displays the contents of a (possibly composite) token +X/* on the standard output and makes it available for re-use. +X/* +X/* put_str() writes a null-terminated string to standard output. +X/* There should be no newline characters in the string argument. +X/* +X/* put_ch() writes one character to standard output. The character +X/* should not be a newline. +X/* +X/* put_nl() outputs a newline character and adjusts the program's idea of +X/* the current output line. +X/* +X/* The in_path and in_line variables contain the file name and +X/* line number of the most recently read token. +X/* BUGS +X/* The tokenizer is just good enough for the unproto filter. +X/* As a benefit, it is quite fast. +X/* AUTHOR(S) +X/* Wietse Venema +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* LAST MODIFICATION +X/* 92/01/15 21:52:59 +X/* VERSION/RELEASE +X/* 1.3 +X/*--*/ +X +Xstatic char io_sccsid[] = "@(#) tok_io.c 1.3 92/01/15 21:52:59"; +X +X/* C library */ +X +X#include <stdio.h> +X#include <ctype.h> +X +Xextern char *strchr(); +Xextern char *malloc(); +Xextern char *realloc(); +Xextern char *strcpy(); +X +X/* Application-specific stuff */ +X +X#include "token.h" +X#include "vstring.h" +X#include "error.h" +X +Xextern char *strsave(); /* XXX need include file */ +X +X/* Stuff to keep track of original source file name and position */ +X +Xstatic char def_path[] = ""; /* default path name */ +X +Xchar *in_path = def_path; /* current input file name */ +Xint in_line = 1; /* current input line number */ +X +Xstatic char *out_path = def_path; /* last name in output line control */ +Xstatic int out_line = 1; /* current output line number */ +Xint last_ch; /* type of last output */ +X +X/* Forward declarations */ +X +Xstatic int read_quoted(); +Xstatic void read_comment(); +Xstatic int backslash_newline(); +Xstatic char *read_hex(); +Xstatic char *read_octal(); +Xstatic void fix_line_control(); +X +X /* +X * Character input with one level of pushback. The INPUT() macro recursively +X * strips backslash-newline pairs from the input stream. The UNPUT() macro +X * should be used only for characters obtained through the INPUT() macro. +X * +X * After skipping a backslash-newline pair, the input line counter is not +X * updated, and we continue with the same logical source line. We just +X * update a counter with the number of backslash-newline sequences that must +X * be accounted for (backslash_newline() updates the counter). At the end of +X * the logical source line, an appropriate number of newline characters is +X * pushed back (in tok_get()). I do not know how GCC handles this, but it +X * seems to produce te same output. +X * +X * Because backslash_newline() recursively calls itself (through the INPUT() +X * macro), we will run out of stack space, given a sufficiently long +X * sequence of backslash-newline pairs. +X */ +X +Xstatic char in_char = 0; /* push-back storage */ +Xstatic int in_flag = 0; /* pushback available */ +Xstatic int nl_compensate = 0; /* line continuation kluge */ +X +X#define INPUT(c) (in_flag ? (in_flag = 0, c = in_char) : \ +X (c = getchar()) != '\\' ? c : \ +X (c = getchar()) != '\n' ? (ungetc(c, stdin), c = '\\') : \ +X (c = backslash_newline())) +X#define UNPUT(c) (in_flag = 1, in_char = c) +X +X/* Directives that should be ignored. */ +X +X#ifdef IGNORE_DIRECTIVES +X +Xstatic char *ignore_directives[] = { +X IGNORE_DIRECTIVES, +X 0, +X}; +X +X#endif +X +X/* Modified string and ctype stuff. */ +X +X#define STREQUAL(x,y) (*(x) == *(y) && strcmp((x),(y)) == 0) +X +X#define ISALNUM(c) (isalnum(c) || (c) == '_') +X#define ISALPHA(c) (isalpha(c) || (c) == '_') +X#define ISSPACE(c) (isspace(c) && c != '\n') +X#define ISDOT(c) (c == '.') +X#define ISHEX(c) (isdigit(c) || strchr("abcdefABCDEF", c) != 0) +X#define ISOCTAL(c) (isdigit(c) && (c) != '8' && (c) != '9') +X +X/* Collect all characters that satisfy one condition */ +X +X#define COLLECT(v,c,cond) { \ +X register struct vstring *vs = v; \ +X register char *cp = vs->str; \ +X *cp++ = c; \ +X while (INPUT(c) != EOF) { \ +X if (cond) { \ +X if (VS_ADDCH(vs, cp, c) == 0) \ +X fatal("out of memory"); \ +X } else { \ +X UNPUT(c); \ +X break; \ +X } \ +X } \ +X *cp = 0; \ +X } +X +X/* Ensure that output line information is correct */ +X +X#define CHECK_LINE_CONTROL(p,l) { if (out_path != (p) || out_line != (l)) \ +X fix_line_control((p),(l)); } +X +X/* do_control - parse control line */ +X +Xstatic int do_control() +X{ +X struct token *t; +X int line; +X char *path; +X +X /* Make sure that the directive shows up in the right place. */ +X +X CHECK_LINE_CONTROL(in_path, in_line); +X +X while (t = tok_get()) { +X switch (t->tokno) { +X +X case TOK_WSPACE: +X /* Ignore blanks after "#" token. */ +X tok_free(t); +X break; +X +X case TOK_NUMBER: +X +X /* +X * Line control is of the form: number pathname junk. Since we +X * have no idea what junk the preprocessor may generate, we copy +X * all line control tokens to stdout. +X */ +X +X put_str("# "); +X line = atoi(t->vstr->str); /* extract line number */ +X tok_flush(t); +X while ((t = tok_get()) && t->tokno == TOK_WSPACE) +X tok_flush(t); /* copy white space */ +X if (t) { /* extract path name */ +X path = (t->tokno == '"') ? strsave(t->vstr->str) : in_path; +X do { +X tok_flush(t); /* copy until newline */ +X } while (t->tokno != '\n' && (t = tok_get())); +X } +X out_line = in_line = line; /* synchronize */ +X out_path = in_path = path; /* synchronize */ +X return; +X +X#ifdef IGNORE_DIRECTIVES +X +X case TOK_WORD: +X +X /* +X * Optionally ignore other #directives. This is only a partial +X * solution, because the preprocessor will still see them. +X */ +X { +X char **cpp; +X char *cp = t->vstr->str; +X +X for (cpp = ignore_directives; *cpp; cpp++) { +X if (STREQUAL(cp, *cpp)) { +X do { +X tok_free(t); +X } while (t->tokno != '\n' && (t = tok_get())); +X return; +X } +X } +X } +X /* FALLTHROUGH */ +X#endif +X default: +X /* Pass through. */ +X put_ch('#'); +X do { +X tok_flush(t); +X } while (t->tokno != '\n' && (t = tok_get())); +X return; +X +X case 0: +X /* Hit EOF, punt. */ +X put_ch('#'); +X return; +X } +X } +X} +X +X/* backslash_newline - fix up things after reading a backslash-newline pair */ +X +Xstatic int backslash_newline() +X{ +X register int c; +X +X nl_compensate++; +X return (INPUT(c)); +X} +X +X/* tok_get - get next token */ +X +Xstatic int last_tokno = '\n'; +X +Xstruct token *tok_get() +X{ +X register struct token *t; +X register int c; +X int d; +X +X /* +X * Get one from the pool and fill it in. The loop is here in case we hit +X * a preprocessor control line, which happens in a minority of all cases. +X * We update the token input path and line info *after* backslash-newline +X * processing or the newline compensation would go wrong. +X */ +X +X t = tok_alloc(); +X +X for (;;) { +X if ((INPUT(c)) == EOF) { +X tok_free(t); +X return (0); +X } else if ((t->line = in_line, t->path = in_path), !isascii(c)) { +X t->vstr->str[0] = c; +X t->vstr->str[1] = 0; +X t->tokno = TOK_OTHER; +X break; +X } else if (ISSPACE(c)) { +X COLLECT(t->vstr, c, ISSPACE(c)); +X t->tokno = TOK_WSPACE; +X break; +X } else if (ISALPHA(c)) { +X COLLECT(t->vstr, c, ISALNUM(c)); +X t->tokno = TOK_WORD; +X break; +X } else if (isdigit(c)) { +X COLLECT(t->vstr, c, isdigit(c)); +X t->tokno = TOK_NUMBER; +X break; +X } else if (c == '"' || c == '\'') { +X t->tokno = read_quoted(t->vstr, c); /* detect missing end quote */ +X break; +X } else if (ISDOT(c)) { +X COLLECT(t->vstr, c, ISDOT(c)); +X t->tokno = TOK_OTHER; +X break; +X } else if (c == '#' && last_tokno == '\n') { +X do_control(); +X continue; +X } else { +X t->vstr->str[0] = c; +X if (c == '\n') { +X in_line++; +X if (nl_compensate > 0) { /* compensation for bs-nl */ +X UNPUT('\n'); +X nl_compensate--; +X } +X } else if (c == '/') { +X if ((INPUT(d)) == '*') { +X t->vstr->str[1] = d; /* comment */ +X read_comment(t->vstr); +X t->tokno = TOK_WSPACE; +X break; +X } else { +X if (d != EOF) +X UNPUT(d); +X } +X } else if (c == '\\') { +X t->vstr->str[1] = (INPUT(c) == EOF ? 0 : c); +X t->vstr->str[2] = 0; +X t->tokno = TOK_OTHER; +X break; +X } +X t->vstr->str[1] = 0; +X t->tokno = c; +X break; +X } +X } +X last_tokno = t->tokno; +X t->end_line = in_line; +X return (t); +X} +X +X/* read_quoted - read string or character literal, canonicalize escapes */ +X +Xstatic int read_quoted(vs, ch) +Xregister struct vstring *vs; +Xint ch; +X{ +X register char *cp = vs->str; +X register int c; +X int ret = TOK_OTHER; +X +X *cp++ = ch; +X +X /* +X * Clobber the token type in case of a premature newline or EOF. This +X * prevents us from attempting to concatenate string constants with +X * broken ones that have no closing quote. +X */ +X +X while (INPUT(c) != EOF) { +X if (c == '\n') { /* newline in string */ +X UNPUT(c); +X break; +X } +X if (VS_ADDCH(vs, cp, c) == 0) /* store character */ +X fatal("out of memory"); +X if (c == ch) { /* closing quote */ +X ret = c; +X break; +X } +X if (c == '\\') { /* parse escape sequence */ +X if ((INPUT(c)) == EOF) { /* EOF, punt */ +X break; +X } else if (c == 'a') { /* \a -> audible bell */ +X if ((cp = vs_strcpy(vs, cp, BELL)) == 0) +X fatal("out of memory"); +X } else if (c == 'x') { /* \xhh -> \nnn */ +X cp = read_hex(vs, cp); +X } else if (ISOCTAL(c) && ch != '\'') { +X cp = read_octal(vs, cp, c); /* canonicalize \octal */ +X } else { +X if (VS_ADDCH(vs, cp, c) == 0) /* \other: leave alone */ +X fatal("out of memory"); +X } +X } +X } +X *cp = 0; +X return (ret); +X} +X +X/* read_comment - stuff a whole comment into one huge token */ +X +Xstatic void read_comment(vs) +Xregister struct vstring *vs; +X{ +X register char *cp = vs->str + 2; /* skip slash star */ +X register int c; +X register int d; +X +X while (INPUT(c) != EOF) { +X if (VS_ADDCH(vs, cp, c) == 0) +X fatal("out of memory"); +X if (c == '*') { +X if ((INPUT(d)) == '/') { +X if (VS_ADDCH(vs, cp, d) == 0) +X fatal("out of memory"); +X break; +X } else { +X if (d != EOF) +X UNPUT(d); +X } +X } else if (c == '\n') { +X in_line++; +X } else if (c == '\\') { +X if ((INPUT(d)) != EOF && VS_ADDCH(vs, cp, d) == 0) +X fatal("out of memory"); +X } +X } +X *cp = 0; +X} +X +X/* read_hex - rewrite hex escape to three-digit octal escape */ +X +Xstatic char *read_hex(vs, cp) +Xstruct vstring *vs; +Xregister char *cp; +X{ +X register int c; +X register int i; +X char buf[BUFSIZ]; +X int len; +X unsigned val; +X +X /* +X * Eat up all subsequent hex digits. Complain later when there are too +X * many. +X */ +X +X for (i = 0; i < sizeof(buf) && (INPUT(c) != EOF) && ISHEX(c); i++) +X buf[i] = c; +X buf[i] = 0; +X +X if (i < sizeof(buf) && c) +X UNPUT(c); +X +X /* +X * Convert hex form to three-digit octal form. The three-digit form is +X * used so that strings can be concatenated without problems. Complain +X * about malformed input; truncate the result to at most three octal +X * digits. +X */ +X +X if (i == 0) { +X error("\\x escape sequence without hexadecimal digits"); +X if (VS_ADDCH(vs, cp, 'x') == 0) +X fatal("out of memory"); +X } else { +X (void) sscanf(buf, "%x", &val); +X sprintf(buf, "%03o", val); +X if ((len = strlen(buf)) > 3) +X error("\\x escape sequence yields non-character value"); +X if ((cp = vs_strcpy(vs, cp, buf + len - 3)) == 0) +X fatal("out of memory"); +X } +X return (cp); +X} +X +X/* read_octal - convert octal escape to three-digit format */ +X +Xstatic char obuf[] = "00123"; +X +Xstatic char *read_octal(vs, cp, c) +Xregister struct vstring *vs; +Xregister char *cp; +Xregister int c; +X{ +X register int i; +X +X#define buf_input (obuf + 2) +X +X /* Eat up at most three octal digits. */ +X +X buf_input[0] = c; +X for (i = 1; i < 3 && (INPUT(c) != EOF) && ISOCTAL(c); i++) +X buf_input[i] = c; +X buf_input[i] = 0; +X +X if (i < 3 && c) +X UNPUT(c); +X +X /* +X * Leave three-digit octal escapes alone. Convert one-digit and two-digit +X * octal escapes to three-digit form by prefixing them with a suitable +X * number of '0' characters. This is done so that strings can be +X * concatenated without problems. +X */ +X +X if ((cp = vs_strcpy(vs, cp, buf_input + i - 3)) == 0) +X fatal("out of memory"); +X return (cp); +X} +X +X/* put_nl - emit newline and adjust output line count */ +X +Xvoid put_nl() +X{ +X put_ch('\n'); +X out_line++; +X} +X +X/* fix_line_control - to adjust path and/or line count info in output */ +X +Xstatic void fix_line_control(path, line) +Xregister char *path; +Xregister int line; +X{ +X +X /* +X * This function is called sporadically, so it should not be a problem +X * that we repeat some of the tests that preceded this function call. +X * +X * Emit a newline if we are not at the start of a line. +X * +X * If we switch files, or if we jump backwards, emit line control. If we +X * jump forward, emit the proper number of newlines to compensate. +X */ +X +X if (last_ch != '\n') /* terminate open line */ +X put_nl(); +X if (path != out_path || line < out_line) { /* file switch or back jump */ +X printf("# %d %s\n", out_line = line, out_path = path); +X last_ch = '\n'; +X } else { /* forward jump */ +X while (line > out_line) +X put_nl(); +X } +X} +X +X/* tok_show_ch - output single-character token (not newline) */ +X +Xvoid tok_show_ch(t) +Xregister struct token *t; +X{ +X CHECK_LINE_CONTROL(t->path, t->line); +X +X put_ch(t->tokno); /* show token contents */ +X} +X +X/* tok_show - output (possibly composite) token */ +X +Xvoid tok_show(t) +Xregister struct token *t; +X{ +X register struct token *p; +X +X if (t->tokno == TOK_LIST) { +X register struct token *s; +X +X /* +X * This branch is completely in terms of tok_xxx() primitives, so +X * there is no need to check the line control information. +X */ +X +X for (s = t->head; s; s = s->next) { +X tok_show_ch(s); /* '(' or ',' or ')' */ +X for (p = s->head; p; p = p->next) +X tok_show(p); /* show list element */ +X } +X } else { +X register char *cp = t->vstr->str; +X +X /* +X * Measurements show that it pays off to give special treatment to +X * single-character tokens. Note that both types of token may cause a +X * change of output line number. +X */ +X +X CHECK_LINE_CONTROL(t->path, t->line); +X if (cp[1] == 0) { +X put_ch(*cp); /* single-character token */ +X } else { +X put_str(cp); /* multi_character token */ +X } +X out_line = t->end_line; /* may span multiple lines */ +X for (p = t->head; p; p = p->next) +X tok_show(p); /* trailing blanks */ +X } +X} +END_OF_tok_io.c +if test 15578 -ne `wc -c <tok_io.c`; then + echo shar: \"tok_io.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f tok_class.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"tok_class.c\" +else +echo shar: Extracting \"tok_class.c\" \(11704 characters\) +sed "s/^X//" >tok_class.c <<'END_OF_tok_class.c' +X/*++ +X/* NAME +X/* tok_class 3 +X/* SUMMARY +X/* token classification +X/* PACKAGE +X/* unproto +X/* SYNOPSIS +X/* #include "token.h" +X/* +X/* void tok_unget(t) +X/* struct token *t; +X/* +X/* struct token *tok_class() +X/* DESCRIPTION +X/* tok_class() collects single and composite tokens, and +X/* recognizes keywords. +X/* At present, the only composite tokens are ()-delimited, +X/* comma-separated lists, and non-whitespace tokens with attached +X/* whitespace or comment tokens. +X/* +X/* Source transformations are: __DATE__ and __TIME__ are rewritten +X/* to string constants with the current date and time, respectively. +X/* Multiple string constants are concatenated. Optionally, "void *" +X/* is mapped to "char *", and plain "void" to "int". +X/* +X/* tok_unget() implements an arbitrary amount of token pushback. +X/* Only tokens obtained through tok_class() should be given to +X/* tok_unget(). This function accepts a list of tokens in +X/* last-read-first order. +X/* DIAGNOSTICS +X/* The code complains if input terminates in the middle of a list. +X/* BUGS +X/* Does not preserve white space at the beginning of a list element +X/* or after the end of a list. +X/* AUTHOR(S) +X/* Wietse Venema +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* LAST MODIFICATION +X/* 92/01/15 21:53:02 +X/* VERSION/RELEASE +X/* 1.4 +X/*--*/ +X +Xstatic char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02"; +X +X/* C library */ +X +X#include <stdio.h> +X +Xextern char *strcpy(); +Xextern long time(); +Xextern char *ctime(); +X +X/* Application-specific stuff */ +X +X#include "error.h" +X#include "vstring.h" +X#include "token.h" +X#include "symbol.h" +X +Xstatic struct token *tok_list(); +Xstatic void tok_list_struct(); +Xstatic void tok_list_append(); +Xstatic void tok_strcat(); +Xstatic void tok_time(); +Xstatic void tok_date(); +Xstatic void tok_space_append(); +X +X#if defined(MAP_VOID_STAR) || defined(MAP_VOID) +Xstatic void tok_void(); /* rewrite void keyword */ +X#endif +X +Xstatic struct token *tok_buf = 0; /* token push-back storage */ +X +X/* TOK_PREPEND - add token to LIFO queue, return head */ +X +X#define TOK_PREPEND(list,t) (t->next = list, list = t) +X +X/* tok_space_append - append trailing space except at start of or after list */ +X +Xstatic void tok_space_append(list, t) +Xregister struct token *list; +Xregister struct token *t; +X{ +X +X /* +X * The head/tail fields of a token do triple duty. They are used to keep +X * track of the members that make up a (list); to keep track of the +X * non-blank tokens that make up one list member; and, finally, to tack +X * whitespace and comment tokens onto the non-blank tokens that make up +X * one list member. +X * +X * Within a (list), white space and comment tokens are always tacked onto +X * the non-blank tokens to avoid parsing complications later on. For this +X * reason, blanks and comments at the beginning of a list member are +X * discarded because there is no token to tack them onto. (Well, we could +X * start each list member with a dummy token, but that would mess up the +X * whole unprototyper). +X * +X * Blanks or comments that follow a (list) are discarded, because the +X * head/tail fields of a (list) are already being used for other +X * purposes. +X * +X * Newlines within a (list) are discarded because they can mess up the +X * output when we rewrite function headers. The output routines will +X * regenerate discarded newlines, anyway. +X */ +X +X if (list == 0 || list->tokno == TOK_LIST) { +X tok_free(t); +X } else { +X tok_list_append(list, t); +X } +X} +X +X/* tok_class - discriminate single tokens, keywords, and composite tokens */ +X +Xstruct token *tok_class() +X{ +X register struct token *t; +X register struct symbol *s; +X +X /* +X * Use push-back token, if available. Push-back tokens are already +X * canonical and can be passed on to the caller without further +X * inspection. +X */ +X +X if (t = tok_buf) { +X tok_buf = t->next; +X t->next = 0; +X return (t); +X } +X /* Read a new token and canonicalize it. */ +X +X if (t = tok_get()) { +X switch (t->tokno) { +X case '(': /* beginning of list */ +X t = tok_list(t); +X break; +X case TOK_WORD: /* look up keyword */ +X if ((s = sym_find(t->vstr->str))) { +X switch (s->type) { +X case TOK_TIME: /* map __TIME__ to string */ +X tok_time(t); +X tok_strcat(t); /* look for more strings */ +X break; +X case TOK_DATE: /* map __DATE__ to string */ +X tok_date(t); +X tok_strcat(t); /* look for more strings */ +X break; +X#if defined(MAP_VOID_STAR) || defined(MAP_VOID) +X case TOK_VOID: /* optionally map void types */ +X tok_void(t); +X break; +X#endif +X default: /* other keyword */ +X t->tokno = s->type; +X break; +X } +X } +X break; +X case '"': /* string, look for more */ +X tok_strcat(t); +X break; +X } +X } +X return (t); +X} +X +X/* tok_list - collect ()-delimited, comma-separated list of tokens */ +X +Xstatic struct token *tok_list(t) +Xstruct token *t; +X{ +X register struct token *list = tok_alloc(); +X char *filename; +X int lineno; +X +X /* Save context of '(' for diagnostics. */ +X +X filename = t->path; +X lineno = t->line; +X +X list->tokno = TOK_LIST; +X list->head = list->tail = t; +X list->path = t->path; +X list->line = t->line; +X#ifdef DEBUG +X strcpy(list->vstr->str, "LIST"); +X#endif +X +X /* +X * Read until the matching ')' is found, accounting for structured stuff +X * (enclosed by '{' and '}' tokens). Break the list up at each ',' token, +X * and try to preserve as much whitespace as possible. Newlines are +X * discarded so that they will not mess up the layout when we rewrite +X * argument lists. The output routines will regenerate discarded +X * newlines. +X */ +X +X while (t = tok_class()) { /* skip blanks */ +X switch (t->tokno) { +X case ')': /* end of list */ +X tok_list_append(list, t); +X return (list); +X case '{': /* struct/union type */ +X tok_list_struct(list->tail, t); +X break; +X case TOK_WSPACE: /* preserve trailing blanks */ +X tok_space_append(list->tail->tail, t); /* except after list */ +X break; +X case '\n': /* fix newlines later */ +X tok_free(t); +X break; +X case ',': /* list separator */ +X tok_list_append(list, t); +X break; +X default: /* other */ +X tok_list_append(list->tail, t); +X break; +X } +X } +X error_where(filename, lineno, "unmatched '('"); +X return (list); /* do not waste any data */ +X} +X +X/* tok_list_struct - collect structured type info within list */ +X +Xstatic void tok_list_struct(list, t) +Xregister struct token *list; +Xregister struct token *t; +X{ +X char *filename; +X int lineno; +X +X /* +X * Save context of '{' for diagnostics. This routine is called by the one +X * that collects list members. If the '}' is not found, the list +X * collector will not see the closing ')' either. +X */ +X +X filename = t->path; +X lineno = t->line; +X +X tok_list_append(list, t); +X +X /* +X * Collect tokens until the matching '}' is found. Try to preserve as +X * much whitespace as possible. Newlines are discarded so that they do +X * not interfere when rewriting argument lists. The output routines will +X * regenerate discarded newlines. +X */ +X +X while (t = tok_class()) { +X switch (t->tokno) { +X case TOK_WSPACE: /* preserve trailing blanks */ +X tok_space_append(list->tail, t); /* except after list */ +X break; +X case '\n': /* fix newlines later */ +X tok_free(t); +X break; +X case '{': /* recurse */ +X tok_list_struct(list, t); +X break; +X case '}': /* done */ +X tok_list_append(list, t); +X return; +X default: /* other */ +X tok_list_append(list, t); +X break; +X } +X } +X error_where(filename, lineno, "unmatched '{'"); +X} +X +X/* tok_strcat - concatenate multiple string constants */ +X +Xstatic void tok_strcat(t1) +Xregister struct token *t1; +X{ +X register struct token *t2; +X register struct token *lookahead = 0; +X +X /* +X * Read ahead past whitespace, comments and newlines. If we find a string +X * token, concatenate it with the previous one and push back the +X * intervening tokens (thus preserving as much information as possible). +X * If we find something else, push back all lookahead tokens. +X */ +X +X#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; } +X +X while (t2 = tok_class()) { +X switch (t2->tokno) { +X case TOK_WSPACE: /* read past comments/blanks */ +X case '\n': /* read past newlines */ +X TOK_PREPEND(lookahead, t2); +X break; +X case '"': /* concatenate string tokens */ +X if (vs_strcpy(t1->vstr, +X t1->vstr->str + strlen(t1->vstr->str) - 1, +X t2->vstr->str + 1) == 0) +X fatal("out of memory"); +X tok_free(t2); +X PUSHBACK_AND_RETURN; +X default: /* something else, push back */ +X tok_unget(t2); +X PUSHBACK_AND_RETURN; +X } +X } +X PUSHBACK_AND_RETURN; /* hit EOF */ +X} +X +X#if defined(MAP_VOID_STAR) || defined(MAP_VOID) +X +X/* tok_void - support for compilers that have problems with "void" */ +X +Xstatic void tok_void(t) +Xregister struct token *t; +X{ +X register struct token *t2; +X register struct token *lookahead = 0; +X +X /* +X * Look ahead beyond whitespace, comments and newlines until we see a '*' +X * token. If one is found, replace "void" by "char". If we find something +X * else, and if "void" should always be mapped, replace "void" by "int". +X * Always push back the lookahead tokens. +X * +X * XXX The code also replaces the (void) argument list; this must be +X * accounted for later on. The alternative would be to add (in unproto.c) +X * TOK_VOID cases all over the place and that would be too error-prone. +X */ +X +X#define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; } +X +X while (t2 = tok_class()) { +X switch (TOK_PREPEND(lookahead, t2)->tokno) { +X case TOK_WSPACE: /* read past comments/blanks */ +X case '\n': /* read past newline */ +X break; +X case '*': /* "void *" -> "char *" */ +X if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0) +X fatal("out of memory"); +X PUSHBACK_AND_RETURN; +X default: +X#ifdef MAP_VOID /* plain "void" -> "int" */ +X if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0) +X fatal("out of memory"); +X#endif +X PUSHBACK_AND_RETURN; +X } +X } +X PUSHBACK_AND_RETURN; /* hit EOF */ +X} +X +X#endif +X +X/* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */ +X +Xstatic void tok_time(t) +Xstruct token *t; +X{ +X long now; +X char *cp; +X char buf[BUFSIZ]; +X +X /* +X * Using sprintf() to select parts of a string is gross, but this should +X * be fast enough. +X */ +X +X (void) time(&now); +X cp = ctime(&now); +X sprintf(buf, "\"%.8s\"", cp + 11); +X if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0) +X fatal("out of memory"); +X t->tokno = buf[0]; +X} +X +X/* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */ +X +Xstatic void tok_date(t) +Xstruct token *t; +X{ +X long now; +X char *cp; +X char buf[BUFSIZ]; +X +X /* +X * Using sprintf() to select parts of a string is gross, but this should +X * be fast enough. +X */ +X +X (void) time(&now); +X cp = ctime(&now); +X sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20); +X if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0) +X fatal("out of memory"); +X t->tokno = buf[0]; +X} +X +X/* tok_unget - push back one or more possibly composite tokens */ +X +Xvoid tok_unget(t) +Xregister struct token *t; +X{ +X register struct token *next; +X +X do { +X next = t->next; +X TOK_PREPEND(tok_buf, t); +X } while (t = next); +X} +X +X/* tok_list_append - append data to list */ +X +Xstatic void tok_list_append(h, t) +Xstruct token *h; +Xstruct token *t; +X{ +X if (h->head == 0) { +X h->head = h->tail = t; +X } else { +X h->tail->next = t; +X h->tail = t; +X } +X} +END_OF_tok_class.c +if test 11704 -ne `wc -c <tok_class.c`; then + echo shar: \"tok_class.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f tok_pool.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"tok_pool.c\" +else +echo shar: Extracting \"tok_pool.c\" \(2175 characters\) +sed "s/^X//" >tok_pool.c <<'END_OF_tok_pool.c' +X/*++ +X/* NAME +X/* tok_pool 3 +X/* SUMMARY +X/* maintain pool of unused token structures +X/* PACKAGE +X/* unproto +X/* SYNOPSIS +X/* #include "token.h" +X/* +X/* struct token *tok_alloc() +X/* +X/* void tok_free(t) +X/* struct token *t; +X/* DESCRIPTION +X/* tok_alloc() and tok_free() maintain a pool of unused token +X/* structures. +X/* +X/* tok_alloc() takes the first free token structure from the pool +X/* or allocates a new one if the pool is empty. +X/* +X/* tok_free() adds a (possibly composite) token structure to the pool. +X/* BUGS +X/* The pool never shrinks. +X/* AUTHOR(S) +X/* Wietse Venema +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* LAST MODIFICATION +X/* 92/01/15 21:53:04 +X/* VERSION/RELEASE +X/* 1.2 +X/*--*/ +X +Xstatic char pool_sccsid[] = "@(#) tok_pool.c 1.2 92/01/15 21:53:04"; +X +X/* C library */ +X +Xextern char *malloc(); +X +X/* Application-specific stuff */ +X +X#include "token.h" +X#include "vstring.h" +X#include "error.h" +X +X#define TOKLEN 5 /* initial string buffer length */ +X +Xstruct token *tok_pool = 0; /* free token pool */ +X +X/* tok_alloc - allocate token structure from pool or heap */ +X +Xstruct token *tok_alloc() +X{ +X register struct token *t; +X +X if (tok_pool) { /* re-use an old one */ +X t = tok_pool; +X tok_pool = t->next; +X } else { /* create a new one */ +X if ((t = (struct token *) malloc(sizeof(struct token))) == 0 +X || (t->vstr = vs_alloc(TOKLEN)) == 0) +X fatal("out of memory"); +X } +X t->next = t->head = t->tail = 0; +X#ifdef DEBUG +X strcpy(t->vstr->str, "BUSY"); +X#endif +X return (t); +X} +X +X/* tok_free - return (possibly composite) token to pool of free tokens */ +X +Xvoid tok_free(t) +Xregister struct token *t; +X{ +X#ifdef DEBUG +X /* Check if we are freeing free token */ +X +X register struct token *p; +X +X for (p = tok_pool; p; p = p->next) +X if (p == t) +X fatal("freeing free token"); +X#endif +X +X /* Free neighbours and subordinates first */ +X +X if (t->next) +X tok_free(t->next); +X if (t->head) +X tok_free(t->head); +X +X /* Free self */ +X +X t->next = tok_pool; +X t->head = t->tail = 0; +X tok_pool = t; +X#ifdef DEBUG +X strcpy(t->vstr->str, "FREE"); +X#endif +X} +END_OF_tok_pool.c +if test 2175 -ne `wc -c <tok_pool.c`; then + echo shar: \"tok_pool.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f vstring.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"vstring.c\" +else +echo shar: Extracting \"vstring.c\" \(3057 characters\) +sed "s/^X//" >vstring.c <<'END_OF_vstring.c' +X/*++ +X/* NAME +X/* vs_alloc(), VS_ADDCH() +X/* SUMMARY +X/* auto-resizing string library +X/* PACKAGE +X/* vstring +X/* SYNOPSIS +X/* #include "vstring.h" +X/* +X/* struct vstring *vs_alloc(len) +X/* int len; +X/* +X/* int VS_ADDCH(vs, wp, ch) +X/* struct vstring *vs; +X/* char *wp; +X/* int ch; +X/* +X/* char *vs_strcpy(vp, dst, src) +X/* struct vstring *vp; +X/* char *dst; +X/* char *src; +X/* DESCRIPTION +X/* These functions and macros implement a small library for +X/* arbitrary-length strings that grow automatically when +X/* they fill up. The allocation strategy is such that there +X/* will always be place for the terminating null character. +X/* +X/* vs_alloc() allocates storage for a variable-length string +X/* of at least "len" bytes. +X/* +X/* VS_ADDCH() adds a character to a variable-length string +X/* and automagically extends the string if fills up. +X/* \fIvs\fP is a pointer to a vstring structure; \fIwp\fP +X/* the current write position in the corresponding character +X/* array; \fIch\fP the character value to be written. +X/* Note that VS_ADDCH() is a macro that evaluates some +X/* arguments more than once. +X/* +X/* vs_strcpy() appends a null-terminated string to a variable-length +X/* string. \fIsrc\fP provides the data to be copied; \fIvp\fP is the +X/* target, and \fIdst\fP the current write position within the target. +X/* The result is null-terminated. The return value is the new write +X/* position. +X/* DIAGNOSTICS +X/* VS_ADDCH() returns zero if it was unable to dynamically +X/* resize a string. +X/* +X/* vs_alloc() returns a null pointer in case of problems. +X/* +X/* vs_strcpy() returns a null pointer if the request failed. +X/* BUGS +X/* Auto-resizing may change the address of the string data in +X/* a vstring structure. Beware of dangling pointers. +X/* AUTHOR(S) +X/* Wietse Venema +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* LAST MODIFICATION +X/* 92/01/15 21:53:06 +X/* VERSION/RELEASE +X/* 1.3 +X/*--*/ +X +Xstatic char vstring_sccsid[] = "@(#) vstring.c 1.3 92/01/15 21:53:06"; +X +X/* C library */ +X +Xextern char *malloc(); +Xextern char *realloc(); +X +X/* Application-specific stuff */ +X +X#include "vstring.h" +X +X/* vs_alloc - initial string allocation */ +X +Xstruct vstring *vs_alloc(len) +Xint len; +X{ +X register struct vstring *vp; +X +X if (len < 1 +X || (vp = (struct vstring *) malloc(sizeof(struct vstring))) == 0 +X || (vp->str = malloc(len)) == 0) +X return (0); +X vp->last = vp->str + len - 1; +X return (vp); +X} +X +X/* vs_realloc - extend string, update write pointer */ +X +Xchar *vs_realloc(vp, cp) +Xregister struct vstring *vp; +Xchar *cp; +X{ +X int where = cp - vp->str; +X int len = vp->last - vp->str + 1; +X +X if ((vp->str = realloc(vp->str, len *= 2)) == 0) +X return (0); +X vp->last = vp->str + len - 1; +X return (vp->str + where); +X} +X +X/* vs_strcpy - copy string */ +X +Xchar *vs_strcpy(vp, dst, src) +Xregister struct vstring *vp; +Xregister char *dst; +Xregister char *src; +X{ +X while (*src) { +X if (VS_ADDCH(vp, dst, *src) == 0) +X return (0); +X src++; +X } +X *dst = '\0'; +X return (dst); +X} +X +END_OF_vstring.c +if test 3057 -ne `wc -c <vstring.c`; then + echo shar: \"vstring.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f symbol.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"symbol.c\" +else +echo shar: Extracting \"symbol.c\" \(3187 characters\) +sed "s/^X//" >symbol.c <<'END_OF_symbol.c' +X/*++ +X/* NAME +X/* symbol 3 +X/* SUMMARY +X/* rudimentary symbol table package +X/* SYNOPSIS +X/* #include "symbol.h" +X/* +X/* void sym_init() +X/* +X/* void sym_enter(name, type) +X/* char *name; +X/* int type; +X/* +X/* struct symbol *sym_find(name) +X/* char *name; +X/* DESCRIPTION +X/* This is a rudimentary symbol-table package, just enough to +X/* keep track of a couple of C keywords. +X/* +X/* sym_init() primes the table with C keywords. At present, most of +X/* the keywords that have to do with types are left out. +X/* We need a different strategy to detect type definitions because +X/* we do not keep track of typedef names. +X/* +X/* sym_enter() adds an entry to the symbol table. +X/* +X/* sym_find() locates a symbol table entry (it returns 0 if +X/* it is not found). +X/* AUTHOR(S) +X/* Wietse Venema +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* LAST MODIFICATION +X/* 92/02/15 18:59:56 +X/* VERSION/RELEASE +X/* 1.4 +X/*--*/ +X +Xstatic char symbol_sccsid[] = "@(#) symbol.c 1.4 92/02/15 18:59:56"; +X +X/* C library */ +X +Xextern char *strcpy(); +Xextern char *malloc(); +X +X/* Application-specific stuff */ +X +X#include "error.h" +X#include "token.h" +X#include "symbol.h" +X +X#define SYM_TABSIZE 20 +X +Xstatic struct symbol *sym_tab[SYM_TABSIZE] = {0,}; +X +X/* More string stuff. Maybe it should go to an #include file. */ +X +X#define STREQ(x,y) (*(x) == *(y) && strcmp((x),(y)) == 0) +X +X/* sym_enter - enter symbol into table */ +X +Xvoid sym_enter(name, type) +Xchar *name; +Xint type; +X{ +X struct symbol *s; +X int where; +X +X if ((s = (struct symbol *) malloc(sizeof(*s))) == 0 +X || (s->name = malloc(strlen(name) + 1)) == 0) +X fatal("out of memory"); +X (void) strcpy(s->name, name); +X s->type = type; +X +X where = hash(name, SYM_TABSIZE); +X s->next = sym_tab[where]; +X sym_tab[where] = s; +X} +X +X/* sym_find - locate symbol definition */ +X +Xstruct symbol *sym_find(name) +Xregister char *name; +X{ +X register struct symbol *s; +X +X /* +X * This function is called for almost every "word" token, so it better be +X * fast. +X */ +X +X for (s = sym_tab[hash(name, SYM_TABSIZE)]; s; s = s->next) +X if (STREQ(name, s->name)) +X return (s); +X return (0); +X} +X +X /* +X * Initialization data for symbol table. We do not enter keywords for types. +X * We use a different strategy to detect type declarations because we do not +X * keep track of typedef names. +X */ +X +Xstruct sym { +X char *name; +X int tokno; +X}; +X +Xstatic struct sym syms[] = { +X "if", TOK_CONTROL, +X "else", TOK_CONTROL, +X "for", TOK_CONTROL, +X "while", TOK_CONTROL, +X "do", TOK_CONTROL, +X "switch", TOK_CONTROL, +X "case", TOK_CONTROL, +X "default", TOK_CONTROL, +X "return", TOK_CONTROL, +X "continue", TOK_CONTROL, +X "break", TOK_CONTROL, +X "goto", TOK_CONTROL, +X "struct", TOK_COMPOSITE, +X "union", TOK_COMPOSITE, +X "__DATE__", TOK_DATE, +X "__TIME__", TOK_TIME, +X#if defined(MAP_VOID_STAR) || defined(MAP_VOID) +X "void", TOK_VOID, +X#endif +X "asm", TOK_OTHER, +X 0, +X}; +X +X/* sym_init - enter known keywords into symbol table */ +X +Xvoid sym_init() +X{ +X register struct sym *p; +X +X for (p = syms; p->name; p++) +X sym_enter(p->name, p->tokno); +X} +X +END_OF_symbol.c +if test 3187 -ne `wc -c <symbol.c`; then + echo shar: \"symbol.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f error.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"error.c\" +else +echo shar: Extracting \"error.c\" \(1942 characters\) +sed "s/^X//" >error.c <<'END_OF_error.c' +X/*++ +X/* NAME +X/* error 3 +X/* SUMMARY +X/* diagnostics +X/* PACKAGE +X/* unproto +X/* SYNOPSIS +X/* #include "error.h" +X/* +X/* int errcount; +X/* +X/* void error(text) +X/* char *text; +X/* +X/* void error_where(path, line, text) +X/* char *path; +X/* int line; +X/* char *text; +X/* +X/* void fatal(text) +X/* char *text; +X/* DESCRIPTION +X/* The routines in this file print a diagnostic (text). Some also +X/* terminate the program. Upon each error*() call, the errcount variable +X/* is incremented. +X/* +X/* error() provides a default context, i.e. the source-file +X/* coordinate of the last read token. +X/* +X/* error_where() allows the caller to explicitly specify context: path +X/* is a source-file name, and line is a line number. +X/* +X/* fatal() is like error() but terminates the program with a non-zero +X/* exit status. +X/* +X/* context is ignored if the line number is zero or if the path +X/* is an empty string. +X/* AUTHOR(S) +X/* Wietse Venema +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* LAST MODIFICATION +X/* 92/01/15 21:53:10 +X/* VERSION/RELEASE +X/* 1.2 +X/*--*/ +X +Xstatic char error_sccsid[] = "@(#) error.c 1.2 92/01/15 21:53:10"; +X +X/* C library */ +X +X#include <stdio.h> +X +Xextern void exit(); +X +X/* Application-specific stuff */ +X +X#include "token.h" +X#include "error.h" +X +Xint errcount = 0; /* error counter */ +X +X/* error - report problem (implicit context) */ +X +Xvoid error(text) +Xchar *text; +X{ +X error_where(in_path, in_line, text); +X} +X +X/* error_where - report problem (explicit context) */ +X +Xvoid error_where(path, line, text) +Xchar *path; +Xint line; +Xchar *text; +X{ +X errcount++; +X +X /* Suppress context info if there is none. */ +X +X if (line && path[0]) +X fprintf(stderr, "%s, line %d: ", path, line); +X +X fprintf(stderr, "%s\n", text); +X} +X +X/* fatal - report problem and terminate unsuccessfully */ +X +Xvoid fatal(text) +Xchar *text; +X{ +X error(text); +X exit(1); +X} +END_OF_error.c +if test 1942 -ne `wc -c <error.c`; then + echo shar: \"error.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f hash.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"hash.c\" +else +echo shar: Extracting \"hash.c\" \(1298 characters\) +sed "s/^X//" >hash.c <<'END_OF_hash.c' +X/*++ +X/* NAME +X/* hash 3 +X/* SUMMARY +X/* compute hash value for string +X/* SYNOPSIS +X/* int hash(string, size) +X/* char *string; +X/* int size; +X/* DESCRIPTION +X/* This function computes for the given null-terminated string an +X/* integer hash value in the range 0..size-1. +X/* SEE ALSO +X/* .fi +X/* Alfred V. Aho, Ravi Sethi and Jeffrey D. Ullman: Compilers: +X/* principles, techniques and tools; Addison-Wesley, Amsterdam, 1986. +X/* AUTHOR(S) +X/* Wietse Venema +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* +X/* Originally written by: P. J. Weinberger at Bell Labs. +X/* LAST MODIFICATION +X/* 92/01/15 21:53:12 +X/* VERSION/RELEASE +X/* %I +X/*--*/ +X +Xstatic char hash_sccsid[] = "@(#) hash.c 1.1 92/01/15 21:53:12"; +X +X/* hash - hash a string; original author: P. J. Weinberger at Bell Labs. */ +X +Xint hash(s, size) +Xregister char *s; +Xunsigned size; +X{ +X register unsigned long h = 0; +X register unsigned long g; +X +X /* +X * For a performance comparison with the hash function presented in K&R, +X * first edition, see the "Dragon" book by Aho, Sethi and Ullman. +X */ +X +X while (*s) { +X h = (h << 4) + *s++; +X if (g = (h & 0xf0000000)) { +X h ^= (g >> 24); +X h ^= g; +X } +X } +X return (h % size); +X} +END_OF_hash.c +if test 1298 -ne `wc -c <hash.c`; then + echo shar: \"hash.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f strsave.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"strsave.c\" +else +echo shar: Extracting \"strsave.c\" \(1610 characters\) +sed "s/^X//" >strsave.c <<'END_OF_strsave.c' +X/*++ +X/* NAME +X/* strsave 3 +X/* SUMMARY +X/* maintain unique copy of a string +X/* SYNOPSIS +X/* char *strsave(string) +X/* char *string; +X/* DESCRIPTION +X/* This function returns a pointer to an unique copy of its +X/* argument. +X/* DIAGNOSTISC +X/* strsave() calls fatal() when it runs out of memory. +X/* AUTHOR(S) +X/* Wietse Venema +X/* Eindhoven University of Technology +X/* Department of Mathematics and Computer Science +X/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X/* LAST MODIFICATION +X/* 92/01/15 21:53:13 +X/* VERSION/RELEASE +X/* 1.1 +X/*--*/ +X +Xstatic char strsave_sccsid[] = "@(#) strsave.c 1.1 92/01/15 21:53:13"; +X +X/* C library */ +X +Xextern char *strcpy(); +Xextern char *malloc(); +X +X/* Application-specific stuff */ +X +X#include "error.h" +X +X#define STR_TABSIZE 100 +X +Xstruct string { +X char *strval; /* unique string copy */ +X struct string *next; /* next one in hash chain */ +X}; +X +Xstatic struct string *str_tab[STR_TABSIZE] = {0,}; +X +X/* More string stuff. Maybe it should go to an #include file. */ +X +X#define STREQ(x,y) (*(x) == *(y) && strcmp((x),(y)) == 0) +X +X/* strsave - save unique copy of string */ +X +Xchar *strsave(str) +Xregister char *str; +X{ +X register struct string *s; +X register int where = hash(str, STR_TABSIZE); +X +X /* Look for existing entry. */ +X +X for (s = str_tab[where]; s; s = s->next) +X if (STREQ(str, s->strval)) +X return (s->strval); +X +X /* Add new entry. */ +X +X if ((s = (struct string *) malloc(sizeof(*s))) == 0 +X || (s->strval = malloc(strlen(str) + 1)) == 0) +X fatal("out of memory"); +X s->next = str_tab[where]; +X str_tab[where] = s; +X return (strcpy(s->strval, str)); +X} +END_OF_strsave.c +if test 1610 -ne `wc -c <strsave.c`; then + echo shar: \"strsave.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f error.h -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"error.h\" +else +echo shar: Extracting \"error.h\" \(228 characters\) +sed "s/^X//" >error.h <<'END_OF_error.h' +X/* @(#) error.h 1.2 92/01/15 21:53:14 */ +X +Xextern int errcount; /* error counter */ +Xextern void error(); /* default context */ +Xextern void error_where(); /* user-specified context */ +Xextern void fatal(); /* fatal error */ +END_OF_error.h +if test 228 -ne `wc -c <error.h`; then + echo shar: \"error.h\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f token.h -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"token.h\" +else +echo shar: Extracting \"token.h\" \(1874 characters\) +sed "s/^X//" >token.h <<'END_OF_token.h' +X/* @(#) token.h 1.4 92/01/15 21:53:17 */ +X +Xstruct token { +X int tokno; /* token value, see below */ +X char *path; /* file name */ +X int line; /* line number at token start */ +X int end_line; /* line number at token end */ +X struct vstring *vstr; /* token contents */ +X struct token *next; +X struct token *head; +X struct token *tail; +X}; +X +X/* Special token values */ +X +X#define TOK_LIST 256 /* () delimited list */ +X#define TOK_WORD 257 /* keyword or identifier */ +X#define TOK_NUMBER 258 /* one or more digits */ +X#define TOK_WSPACE 259 /* comment, white space, not newline */ +X#define TOK_OTHER 260 /* other token */ +X#define TOK_CONTROL 261 /* flow control keyword */ +X#define TOK_COMPOSITE 262 /* struct or union keyword */ +X#define TOK_DATE 263 /* date: Mmm dd yyyy */ +X#define TOK_TIME 264 /* time: hh:mm:ss */ +X#define TOK_VOID 265 /* void keyword */ +X +X/* Input/output functions and macros */ +X +Xextern struct token *tok_get(); /* read next single token */ +Xextern void tok_show(); /* display (composite) token */ +Xextern struct token *tok_class(); /* classify tokens */ +Xextern void tok_unget(); /* stuff token back into input */ +Xextern void put_nl(); /* print newline character */ +Xextern void tok_show_ch(); /* emit single-character token */ +X +X#define tok_flush(t) (tok_show(t), tok_free(t)) +X +X#ifdef DEBUG +X#define put_ch(c) (putchar(last_ch = c),fflush(stdout)) +X#define put_str(s) (fputs(s,stdout),last_ch = 0,fflush(stdout)) +X#else +X#define put_ch(c) putchar(last_ch = c) +X#define put_str(s) (fputs(s,stdout),last_ch = 0) +X#endif +X +X/* Memory management */ +X +Xstruct token *tok_alloc(); /* allocate token storage */ +Xextern void tok_free(); /* re-cycle storage */ +X +X/* Context */ +X +Xextern char *in_path; /* current input path name */ +Xextern int in_line; /* current input line number */ +Xextern int last_ch; /* type of last output */ +END_OF_token.h +if test 1874 -ne `wc -c <token.h`; then + echo shar: \"token.h\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f vstring.h -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"vstring.h\" +else +echo shar: Extracting \"vstring.h\" \(455 characters\) +sed "s/^X//" >vstring.h <<'END_OF_vstring.h' +X/* @(#) vstring.h 1.2 92/01/15 21:53:19 */ +X +Xstruct vstring { +X char *str; /* string value */ +X char *last; /* last position */ +X}; +X +Xextern struct vstring *vs_alloc(); /* initial allocation */ +Xextern char *vs_realloc(); /* string extension */ +Xextern char *vs_strcpy(); /* copy string */ +X +X/* macro to add one character to auto-resized string */ +X +X#define VS_ADDCH(vs,wp,c) \ +X ((wp < (vs)->last || (wp = vs_realloc(vs,wp))) ? (*wp++ = c) : 0) +END_OF_vstring.h +if test 455 -ne `wc -c <vstring.h`; then + echo shar: \"vstring.h\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f symbol.h -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"symbol.h\" +else +echo shar: Extracting \"symbol.h\" \(318 characters\) +sed "s/^X//" >symbol.h <<'END_OF_symbol.h' +X/* @(#) symbol.h 1.1 91/09/22 21:21:42 */ +X +Xstruct symbol { +X char *name; /* symbol name */ +X int type; /* symbol type */ +X struct symbol *next; +X}; +X +Xextern void sym_enter(); /* add symbol to table */ +Xextern struct symbol *sym_find(); /* locate symbol */ +Xextern void sym_init(); /* prime the table */ +END_OF_symbol.h +if test 318 -ne `wc -c <symbol.h`; then + echo shar: \"symbol.h\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f Makefile -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"Makefile\" +else +echo shar: Extracting \"Makefile\" \(4431 characters\) +sed "s/^X//" >Makefile <<'END_OF_Makefile' +X# @(#) Makefile 1.6 93/06/18 22:29:40 +X +X## BEGIN CONFIGURATION STUFF +X +X# In the unlikely case that your compiler has no hooks for alternate +X# compiler passes, use a "cc cflags -E file.c | unproto >file.i" +X# pipeline, then "cc cflags -c file.i" to compile the resulting +X# intermediate file. +X# +X# Otherwise, the "/lib/cpp | unproto" pipeline can be packaged as an +X# executable shell script (see the provided "cpp.sh" script) that should +X# be installed as "/whatever/cpp". This script should then be specified +X# to the C compiler as a non-default preprocessor. +X# +X# PROG = unproto +X# PIPE = +X +X# The overhead and problems of shell script interpretation can be +X# eliminated by having the unprototyper program itself open the pipe to +X# the preprocessor. In that case, define the PIPE_THROUGH_CPP macro as +X# the path name of the default C preprocessor (usually "/lib/cpp"), +X# install the unprototyper as "/whatever/cpp" and specify that to the C +X# compiler as a non-default preprocessor. +X# +XPROG = cpp +XPIPE = -DPIPE_THROUGH_CPP=\"/lib/cpp\" +X +X# Some compilers complain about some #directives. The following is only a +X# partial solution, because the directives are still seen by /lib/cpp. +X# Be careful with filtering out #pragma, because some pre-ANSI compilers +X# (SunOS) rely on its use. +X# +X# SKIP = -DIGNORE_DIRECTIVES=\"pragma\",\"foo\",\"bar\" +X# +XSKIP = +X +X# The bell character code depends on the character set. With ASCII, it is +X# 7. Specify a string constant with exactly three octal digits. If you +X# change this definition, you will have to update the example.out file. +X# +XBELL = -DBELL=\"007\" +X +X# Some C compilers have problems with "void". The nature of the problems +X# depends on the age of the compiler. +X# +X# If your compiler does not understand "void" at all, compile with +X# -DMAP_VOID. The unprototyper will replace "void *" by "char *", a +X# (void) argument list by an empty one, and will replace all other +X# instances of "void" by "int". +X# +X# If your compiler has problems with "void *" only, compile with +X# -DMAP_VOID_STAR. The unprototyper will replace "void *" by "char *", +X# and will replace a (void) argument list by an empty one. All other +X# instances of "void" will be left alone. +X# +X# If neither of these are defined, (void) argument lists will be replaced +X# by empty ones. +X# +X# MAP = -DMAP_VOID_STAR +X +X# Now that we have brought up the subject of antique C compilers, here's +X# a couple of aliases that may be useful, too. +X# +X# ALIAS = -Dstrchr=index +X +X# If you need support for functions that implement ANSI-style variable +X# length argument lists, edit the stdarg.h file provided with this +X# package so that it contains the proper definitions for your machine. +X +X## END CONFIGURATION STUFF +X +XSHELL = /bin/sh +X +XCFILES = unproto.c tok_io.c tok_class.c tok_pool.c vstring.c symbol.c error.c \ +X hash.c strsave.c +XHFILES = error.h token.h vstring.h symbol.h +XSCRIPTS = cpp.sh acc.sh +XSAMPLES = stdarg.h stddef.h stdlib.h varargs.c example.c example.out +XSOURCES = README $(CFILES) $(HFILES) Makefile $(SCRIPTS) $(SAMPLES) +XFILES = $(SOURCES) unproto.1 +XOBJECTS = tok_io.o tok_class.o tok_pool.o unproto.o vstring.o symbol.o error.o \ +X hash.o strsave.o +X +XCFLAGS = -O $(PIPE) $(SKIP) $(BELL) $(MAP) $(ALIAS) +X#CFLAGS = -O $(PIPE) $(SKIP) $(BELL) $(MAP) $(ALIAS) -p -Dstatic= +X#CFLAGS = -g $(PIPE) $(SKIP) $(BELL) $(MAP) $(ALIAS) -DDEBUG +X +X$(PROG): $(OBJECTS) +X $(CC) $(CFLAGS) -o $@ $(OBJECTS) $(MALLOC) +X +X# For linting, enable all bells and whistles. +X +Xlint: +X lint -DPIPE_THROUGH_CPP=\"foo\" -DIGNORE_DIRECTIVES=\"foo\",\"bar\" \ +X $(BELL) -DMAP_VOID $(ALIAS) $(CFILES) +X +X# Testing requires that the program is compiled with -DDEBUG. +X +Xtest: $(PROG) cpp example.c example.out +X ./cpp example.c >example.tmp +X @echo the following diff command should produce no output +X diff -b example.out example.tmp +X rm -f example.tmp +X +Xshar: $(FILES) +X @shar $(FILES) +X +Xarchive: +X $(ARCHIVE) $(SOURCES) +X +Xclean: +X rm -f *.o core cpp unproto mon.out varargs.o varargs example.tmp +X +Xerror.o : error.c token.h error.h Makefile +Xhash.o : hash.c Makefile +Xstrsave.o : strsave.c error.h Makefile +Xsymbol.o : symbol.c error.h token.h symbol.h Makefile +Xtok_class.o : tok_class.c error.h vstring.h token.h symbol.h Makefile +Xtok_io.o : tok_io.c token.h vstring.h error.h Makefile +Xtok_pool.o : tok_pool.c token.h vstring.h error.h Makefile +Xunproto.o : unproto.c vstring.h stdarg.h token.h error.h symbol.h Makefile +Xvarargs.o : varargs.c stdarg.h Makefile +Xvstring.o : vstring.c vstring.h Makefile +END_OF_Makefile +if test 4431 -ne `wc -c <Makefile`; then + echo shar: \"Makefile\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f cpp.sh -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"cpp.sh\" +else +echo shar: Extracting \"cpp.sh\" \(823 characters\) +sed "s/^X//" >cpp.sh <<'END_OF_cpp.sh' +X#!/bin/sh +X +X# @(#) cpp.sh 1.3 92/01/15 21:53:22 +X +X# Unprototypeing preprocessor for pre-ANSI C compilers. On some systems, +X# this script can be as simple as: +X# +X# /lib/cpp "$@" | unproto +X# +X# However, some cc(1) drivers specify output file names on the +X# preprocessor command line, so this shell script must be prepared to +X# intercept them. Depending on the driver program, the cpp options may +X# even go before or after the file name argument(s). The script below +X# tries to tackle all these cases. +X# +X# You may want to add -Ipath_to_stdarg.h_file, -Dvoid=, -Dvolatile=, +X# and even -D__STDC__. +X +Xcpp_args="" +X +Xwhile : +Xdo +X case $1 in +X "") break;; +X -*) cpp_args="$cpp_args $1";; +X *) cpp_args="$cpp_args $1" +X case $2 in +X ""|-*) ;; +X *) exec 1> $2 || exit 1; shift;; +X esac;; +X esac +X shift +Xdone +X +X/lib/cpp $cpp_args | unproto +END_OF_cpp.sh +if test 823 -ne `wc -c <cpp.sh`; then + echo shar: \"cpp.sh\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f acc.sh -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"acc.sh\" +else +echo shar: Extracting \"acc.sh\" \(1242 characters\) +sed "s/^X//" >acc.sh <<'END_OF_acc.sh' +X#!/bin/sh +X +X# @(#) acc.sh 1.1 93/06/18 22:29:42 +X# +X# Script to emulate most of an ANSI C compiler with a traditional UNIX +X# C compiler. +X +X# INCDIR should be the directory with auxiliary include files from the +X# unproto source distribution (stdarg.h, stdlib.h, stddef.h, and other +X# stuff that is missing from your compilation environment). With Ultrix +X# 4.[0-2] you need unproto's stdarg.h even though the system provides +X# one. +X# +XINCDIR=. +X +X# CPPDIR should be the directory with the unprototypeing cpp filter +X# (preferably the version with the PIPE_THROUGH_CPP feature). +X# +XCPPDIR=. +X +X# DEFINES: you will want to define volatile and const, and maybe even +X# __STDC__. +X# +XDEFINES="-Dvolatile= -Dconst= -D__STDC__" +X +X# Possible problem: INCDIR should be listed after the user-specified -I +X# command-line options, not before them as we do here. This is a problem +X# only if you attempt to redefine system libraries. +X# +X# Choose one of the commands below that is appropriate for your system. +X# +Xexec cc -Qpath ${CPPDIR} -I${INCDIR} ${DEFINES} "$@" # SunOS 4.x +Xexec cc -tp -h${CPPDIR} -B -I${INCDIR} ${DEFINES} "$@" # Ultrix 4.2 +Xexec cc -Yp,${CPPDIR} -I${INCDIR} ${DEFINES} "$@" # M88 SysV.3 +Xexec cc -B${CPPDIR}/ -tp -I${INCDIR} ${DEFINES} "$@" # System V.2 +END_OF_acc.sh +if test 1242 -ne `wc -c <acc.sh`; then + echo shar: \"acc.sh\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f stdarg.h -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"stdarg.h\" +else +echo shar: Extracting \"stdarg.h\" \(3075 characters\) +sed "s/^X//" >stdarg.h <<'END_OF_stdarg.h' +X /* +X * @(#) stdarg.h 1.4 93/06/18 22:29:44 +X * +X * Sample stdarg.h file for use with the unproto filter. +X * +X * This file serves two purposes. +X * +X * 1 - On systems that do not have a /usr/include/stdarg.h file, it should be +X * included by C source files that implement ANSI-style variadic functions. +X * Ultrix 4.[0-2] comes with stdarg.h but still needs the one that is +X * provided with the unproto filter. +X * +X * 2 - To configure the unprototyper itself. If the _VA_ALIST_ macro is +X * defined, its value will appear in the place of the "..." at the end of +X * argument lists of variadic function *definitions* (not declarations). +X * Some compilers (such as Greenhills m88k) have a non-empty va_dcl +X * definition in the system header file varargs.h. If that is the case, +X * define "_VA_DCL_" with the same value as va_dcl. If _VA_DCL_ is defined, +X * the unprototyper will emit its value just before the opening "{". +X * +X * Compilers that always pass arguments via the stack can use the default code +X * at the end of this file (this usually applies for the vax, mc68k and +X * 80*86 architectures). +X * +X * Special tricks are needed for compilers that pass some or all function +X * arguments via registers. Examples of the latter are given for the mips +X * and sparc architectures. Usually the compiler special-cases an argument +X * declaration such as "va_alist" or "__builtin_va_alist". For inspiration, +X * see the local /usr/include/varargs.h file. +X * +X * You can use the varargs.c program provided with the unproto package to +X * verify that the stdarg.h file has been set up correctly. +X */ +X +X#ifdef sparc /* tested with SunOS 4.1.1 */ +X +X#define _VA_ALIST_ "__builtin_va_alist" +Xtypedef char *va_list; +X#define va_start(ap, p) (ap = (char *) &__builtin_va_alist) +X#define va_arg(ap, type) ((type *) __builtin_va_arg_incr((type *) ap))[0] +X#define va_end(ap) +X +X#else +X#ifdef mips /* tested with Ultrix 4.0 and 4.2 */ +X +X#define _VA_ALIST_ "va_alist" +X#include "/usr/include/stdarg.h" +X +X#else +X#ifdef m88k /* Motorola SYSTEM V/88 R32V3 */ +X +X#define _VA_ALIST_ "va_alist" +X#define _VA_DCL_ "va_type va_alist;" +Xtypedef struct _va_struct { +X int va_narg; +X int *va_stkaddr; +X int *va_iregs; +X} va_list; +X#define va_start(ap, p) \ +X((ap).va_narg=(int *)&va_alist-va_stkarg, \ +X (ap).va_stkaddr=va_stkarg, \ +X (ap).va_iregs=(int *)va_intreg) +X#define va_end(p) +X#if defined(LittleEndian) +X#define va_arg(p,mode) \ +X (*(mode *)_gh_va_arg(&p, va_align(mode), va_regtyp(mode), sizeof(mode))) +X#else /* defined(LittleEndian) */ +X#define va_arg(p,mode) ( \ +X (p).va_narg += ((p).va_narg & (va_align(mode) == 8)) + \ +X (sizeof(mode)+3)/4, \ +X ((mode *)((va_regtyp(mode) && (p).va_narg <= 8 ? \ +X (p).va_iregs: \ +X (p).va_stkaddr) + (p).va_narg))[-1]) +X#endif /* defined(LittleEndian) */ +X +X#else /* vax, mc68k, 80*86 */ +X +Xtypedef char *va_list; +X#define va_start(ap, p) (ap = (char *) (&(p)+1)) +X#define va_arg(ap, type) ((type *) (ap += sizeof(type)))[-1] +X#define va_end(ap) +X +X#endif /* m88k */ +X#endif /* mips */ +X#endif /* sparc */ +END_OF_stdarg.h +if test 3075 -ne `wc -c <stdarg.h`; then + echo shar: \"stdarg.h\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f stddef.h -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"stddef.h\" +else +echo shar: Extracting \"stddef.h\" \(587 characters\) +sed "s/^X//" >stddef.h <<'END_OF_stddef.h' +X/* @(#) stddef.h 1.1 92/02/15 17:25:46 */ +X +X#ifndef _stddef_h_ +X#define _stddef_h_ +X +X/* NULL is also defined in <stdio.h> */ +X +X#ifndef NULL +X#define NULL 0 +X#endif +X +X/* Structure member offset - some compilers barf on this. */ +X +X#define offsetof(type, member) ((size_t) &((type *)0)->member) +X +X/* Some of the following types may already be defined in <sys/types.h>. */ +X +X/* #include <sys/types.h> */ +X/* typedef long ptrdiff_t; /* type of pointer difference */ +X/* typedef unsigned short wchar_t; /* wide character type */ +X/* typedef unsigned size_t; /* type of sizeof */ +X +X#endif /* _stddef_h_ */ +END_OF_stddef.h +if test 587 -ne `wc -c <stddef.h`; then + echo shar: \"stddef.h\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f stdlib.h -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"stdlib.h\" +else +echo shar: Extracting \"stdlib.h\" \(1004 characters\) +sed "s/^X//" >stdlib.h <<'END_OF_stdlib.h' +X/* @(#) stdlib.h 1.1 92/02/15 17:25:45 */ +X +X#ifndef _stdlib_h_ +X#define _stdlib_h_ +X +X/* NULL is also defined in <stdio.h> */ +X +X#ifndef NULL +X#define NULL 0 +X#endif +X +X/* +X * Some functions in this file will be missing from the typical pre-ANSI +X * UNIX library. Some pre-ANSI UNIX library functions have return types +X * that differ from what ANSI requires. +X */ +X +Xextern double atof(); +Xextern int atoi(); +Xextern long atol(); +Xextern double strtod(); +Xextern long strtol(); +Xextern unsigned long strtoul(); +Xextern int rand(); +Xextern void srand(); +Xextern char *calloc(); +Xextern char *malloc(); +Xextern char *realloc(); +Xextern void free(); +Xextern void abort(); +Xextern void exit(); +Xextern int atextit(); +Xextern int system(); +Xextern char *getenv(); +Xextern char *bsearch(); +Xextern void qsort(); +Xextern int abs(); +Xextern long labs(); +X +Xtypedef struct { +X int quot; +X int rem; +X} div_t; +X +Xtypedef struct { +X long quot; +X long rem; +X} ldiv_t; +X +Xextern div_t div(); +Xextern ldiv_t ldiv(); +X +X#endif /* _stdlib_h_ */ +END_OF_stdlib.h +if test 1004 -ne `wc -c <stdlib.h`; then + echo shar: \"stdlib.h\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f varargs.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"varargs.c\" +else +echo shar: Extracting \"varargs.c\" \(606 characters\) +sed "s/^X//" >varargs.c <<'END_OF_varargs.c' +X /* +X * @(#) varargs.c 1.1 91/09/01 23:08:45 +X * +X * This program can be used to verify that the stdarg.h file is set up +X * correctly for your system. If it works, it should print one line with the +X * text "stdarg.h works". +X */ +X +X#include <stdio.h> +X#include "stdarg.h" +X +Xmain(int argc, char *argv[]) +X{ +X varargs_test("%s %s\n", "stdarg.h", "works"); +X} +X +Xvarargs_test(char *fmt, ...) +X{ +X va_list ap; +X +X va_start(ap, fmt); +X while (*fmt) { +X if (strncmp("%s", fmt, 2) == 0) { +X fputs(va_arg(ap, char *), stdout); +X fmt += 2; +X } else { +X putchar(*fmt); +X fmt++; +X } +X } +X va_end(ap); +X} +END_OF_varargs.c +if test 606 -ne `wc -c <varargs.c`; then + echo shar: \"varargs.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f example.c -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"example.c\" +else +echo shar: Extracting \"example.c\" \(3525 characters\) +sed "s/^X//" >example.c <<'END_OF_example.c' +X /* +X * @(#) example.c 1.5 93/06/18 22:29:46 +X * +X * Examples of things that can be done with the unproto package +X */ +X +Xtypedef char *charstar; +X +X /* +X * New-style argument list with structured argument, one field being pointer +X * to function returning pointer to function with function-pointer argument +X */ +X +Xx(struct { +X struct { +X int (*(*foo) (int (*arg1) (double))) (float arg2); +X } foo; +X} baz) { +X return (0); +X} +X +X /* New-style function-pointer declaration. */ +X +Xint (*(*bar0) (float)) (int); +X +X /* Old-style argument list with new-style argument type. */ +X +Xbaz0(bar) +Xint (*(*bar) (float)) (int); +X{} +X +X /* +X * New-style argument list with new-style argument type, declaration +X * embedded within block. Plus a couple assignments with function calls that +X * look like casts. +X */ +X +Xfoo(int (*(*bar) (float)) (int)) +X{ +X int (*baz) (int) = (int (*) (int)) 0, +X y = (y * (*baz) (y)), +X *(*z) (int) = (int *(*) (int)) 0; +X +X struct { int (*foo)(int); } *(*s)(int) = +X (struct { int (*foo)(int); } *(*)(int)) 0; +X +X { +X y = (y * (*baz) (y)); +X } +X { +X z = (int *(*) (int)) 0; +X } +X { +X s = (struct { int (*foo)(int); } *(*)(int)) 0; +X } +X +X return (0); +X} +X +X/* Multiple declarations in one statement */ +X +Xtest1() +X{ +X int foo2,*(*(*bar)(int))(float),*baz(double); +X} +X +X/* Discriminate declarations from executable statements */ +X +Xtest2(charstar y) +X{ +X int foo = 5,atoi(charstar); +X +X foo = 5,atoi(y); +X} +X +X/* Declarations without explicit type */ +X +Xtest3,test4(int); +X +Xtest5(int y) +X{ +X { +X test3; +X } +X { +X test4(y); +X } +X} +X +Xtest6[1],test7(int); +X +Xtest7(int x) +X{ +X { +X test6[1]; +X } +X { +X test7(x); +X } +X} +X +X/* Checking a complicated cast */ +X +Xstruct { +X struct { +X int (*f)(int), o; +X } bar; +X} (*baz2)(int) = (struct { struct { int (*f)(int), o; } bar; } (*)(int)) 0; +X +X/* Distinguish things with the same shape but with different meaning */ +X +Xtest8(x) +X{ +X { +X struct { +X int foo; +X } bar(charstar); +X } +X { +X do { +X int foo; +X } while (x); +X } +X} +X +X/* Do not think foo(*bar) is a function pointer declaration */ +X +Xtest9(char *bar) +X{ +X foo(*bar); +X} +X +X/* another couple of special-cased words. */ +X +Xtest10(int x) +X{ +X { +X int test10(int); +X do test10(x); +X while (x); +X } +X { +X return test10(x); +X } +X} +X +Xtest11(int *x) +X{ +X while (*x) +X (putchar(*x++)); +X} +X +Xtest11a(int *x) +X{ +X for (*x;;) +X (putchar(*x++)); +X} +X +X/* #include directive between stuff that requires lookahead */ +X +Xtest12() +X{ +X char *x = "\xf\0002\002\02\2" /* foo */ +X#include "/dev/null" +X "\abar"; +X +X printf("foo" /* 1 */ "bar" /* 2 */ "baz"); +X +X *x = '\a'; +X *x = '\xff'; +X} +X +Xint test13(void); +X +X/* line continuations in the middle of tokens */ +X +Xte\ +Xst14(); +Xcharstar test15 = "foo\ +Xbar"; +Xchar test16 = "foo\\ +Xabar"; +X +X/* Array dimensions with unexpanded macros */ +X +Xtest17(charstar foo[bar]){} +X +Xint (*(*test18[bar])(charstar))(charstar) = \ +X (int (*(*[bar])(charstar))(charstar)) 0; +X +X/* Function returning pointer to function */ +X +Xint (*(*test19(long))(int))(double); +X +X/* GCC accepts the following stuff, K&R C does not... */ +X +Xvoid test20(int test21(double)) {} +X +Xvoid test22(struct { int foo; } test23(short)) {} +X +X/* Do not blindly rewrite (*name(stuff))(otherstuff) */ +X +Xvoid test23() +X{ +X int (*test24(int)) (int), +X y = (*test24(2)) (3), +X z = ((*test24(2)) (3)); +X} +X +X/* Function returning pointer to function */ +X +Xint (*(*test25(long foo))(int bar))(double baz){ /* body */ } +X +Xint (*(*test26(foo))())() +Xlong foo; +X{ /* body */ } +X +X#define ARGSTR() struct {int l; char c[1];} +X +Xvoid functie(ARGSTR() *cmdlin, ARGSTR() *c1) +X{ +X} +END_OF_example.c +if test 3525 -ne `wc -c <example.c`; then + echo shar: \"example.c\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f example.out -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"example.out\" +else +echo shar: Extracting \"example.out\" \(3113 characters\) +sed "s/^X//" >example.out <<'END_OF_example.out' +X# 1 "example.c" +X +X +X +X +X +X +Xtypedef char *charstar; +X +X +X +X +X +X +Xx( +X +X +X +Xbaz) +X# 14 "example.c" +Xstruct { +X struct { +X int (*(*foo)())(); +X } foo; +X} baz; +X# 18 "example.c" +X{/*1*/ +X /* end dcls */return (0); +X}/*1*/ +X +X +X +Xint (*(*bar0)())(); +X +X +X +Xbaz0(bar) +Xint (*(*bar)())(); +X{/*1*/}/*1*/ +X +X +X +X +X +X +X +Xfoo(bar) +X# 38 "example.c" +Xint (*(*bar)())(); +X{/*1*/ +X int (*baz)()= (int (*)()) 0, +X y = (y * (*baz)(y)), +X *(*z)()= (int *(*)()) 0; +X +X struct {/*2*/ int (*foo)(); }/*2*/ *(*s)()= +X (struct { int (*foo)(); } *(*)()) 0; +X +X /* end dcls */{/*2*/ +X y /* end dcls */= (y * (*baz)(y)); +X }/*2*/ +X {/*2*/ +X z /* end dcls */= (int *(*)()) 0; +X }/*2*/ +X {/*2*/ +X s /* end dcls */= (struct { int (*foo)(); } *(*)()) 0; +X }/*2*/ +X +X return (0); +X}/*1*/ +X +X +X +Xtest1() +X{/*1*/ +X int foo2,*(*(*bar)())(),*baz(); +X}/*1*/ +X +X +X +Xtest2(y) +X# 69 "example.c" +Xcharstar y; +X{/*1*/ +X int foo = 5,atoi(); +X +X foo /* end dcls */= 5,atoi(y); +X}/*1*/ +X +X +X +Xtest3,test4(); +X +Xtest5(y) +X# 80 "example.c" +Xint y; +X{/*1*/ +X /* end dcls */{/*2*/ +X test3/* end dcls */; +X }/*2*/ +X {/*2*/ +X test4/* end dcls */(y); +X }/*2*/ +X}/*1*/ +X +Xtest6[1],test7(); +X +Xtest7(x) +X# 92 "example.c" +Xint x; +X{/*1*/ +X /* end dcls */{/*2*/ +X test6/* end dcls */[1]; +X }/*2*/ +X {/*2*/ +X test7/* end dcls */(x); +X }/*2*/ +X}/*1*/ +X +X +X +Xstruct {/*1*/ +X struct {/*2*/ +X int (*f)(), o; +X }/*2*/ bar; +X}/*1*/ (*baz2)()= (struct { struct { int (*f)(), o; } bar; } (*)()) 0; +X +X +X +Xtest8(x) +X{/*1*/ +X /* end dcls */{/*2*/ +X struct {/*3*/ +X int foo; +X }/*3*/ bar(); +X }/*2*/ +X {/*2*/ +X /* end dcls */do {/*3*/ +X int foo; +X }/*3*/ while (x); +X }/*2*/ +X}/*1*/ +X +X +X +Xtest9(bar) +X# 128 "example.c" +Xchar *bar; +X{/*1*/ +X foo/* end dcls */(*bar); +X}/*1*/ +X +X +X +Xtest10(x) +X# 135 "example.c" +Xint x; +X{/*1*/ +X /* end dcls */{/*2*/ +X int test10(); +X /* end dcls */do test10(x); +X while (x); +X }/*2*/ +X {/*2*/ +X /* end dcls */return test10(x); +X }/*2*/ +X}/*1*/ +X +Xtest11(x) +X# 147 "example.c" +Xint *x; +X{/*1*/ +X /* end dcls */while (*x) +X (putchar(*x++)); +X}/*1*/ +X +Xtest11a(x) +X# 153 "example.c" +Xint *x; +X{/*1*/ +X /* end dcls */for (*x;;) +X (putchar(*x++)); +X}/*1*/ +X +X +X +Xtest12() +X{/*1*/ +X char *x = +X# 1 "/dev/null" 1 +X# 165 "example.c" 2 +X# 163 "example.c" +X"\017\0002\002\002\002\007bar" +X +X ; +X +X printf/* end dcls */("foobarbaz" ); +X +X *x = '\007'; +X *x = '\377'; +X}/*1*/ +X +Xint test13(); +X +X +X +Xtest14(); +X +Xcharstar test15 = "foobar"; +X +Xchar test16 = "foo\007bar"; +X +X +X +X +Xtest17(foo) +X# 186 "example.c" +Xcharstar foo[bar]; +X# 186 "example.c" +X{/*1*/}/*1*/ +X +Xint (*(*test18[bar])())()= (int (*(*[bar])())()) 0; +X +X +X +X +Xint (*(*test19())())(); +X +X +X +Xvoid test20(test21) +X# 197 "example.c" +Xint test21(); +X# 197 "example.c" +X{/*1*/}/*1*/ +X +Xvoid test22(test23) +X# 199 "example.c" +Xstruct { int foo; } test23(); +X# 199 "example.c" +X{/*1*/}/*1*/ +X +X +X +Xvoid test23() +X{/*1*/ +X int (*test24())(), +X y = (*test24(2)) (3), +X z = ((*test24(2))(3)); +X}/*1*/ +X +X +X +Xint (*(*test25(foo))())() +X# 212 "example.c" +Xlong foo; +X# 212 "example.c" +X{/*1*/ }/*1*/ +X +Xint (*(*test26(foo))())() +Xlong foo; +X{/*1*/ }/*1*/ +X +X +X +Xvoid functie(cmdlin,c1) +X# 220 "example.c" +Xstruct {int l; char c[1];} *cmdlin; +X# 220 "example.c" +Xstruct {int l; char c[1];} *c1; +X{/*1*/ +X}/*1*/ +END_OF_example.out +if test 3113 -ne `wc -c <example.out`; then + echo shar: \"example.out\" unpacked with wrong size! +fi +# end of overwriting check +fi +if test -f unproto.1 -a "${1}" != "-c" ; then + echo shar: Will not over-write existing file \"unproto.1\" +else +echo shar: Extracting \"unproto.1\" \(4954 characters\) +sed "s/^X//" >unproto.1 <<'END_OF_unproto.1' +X.TH UNPROTO 1 +X.ad +X.fi +X.SH NAME +Xunproto +X\- +Xcompile ANSI C with traditional UNIX C compiler +X.SH PACKAGE +X.na +X.nf +Xunproto +X.SH SYNOPSIS +X.na +X.nf +X/somewhere/cpp ... +X +Xcc cflags -E file.c | unproto >file.i; cc cflags -c file.i +X.SH DESCRIPTION +X.ad +X.fi +XThis document describes a filter that sits in between the UNIX +XC preprocessor and the next UNIX C compiler stage, on the fly rewriting +XANSI-style syntax to old-style syntax. Typically, the program is +Xinvoked by the native UNIX C compiler as an alternate preprocessor. +XThe unprototyper in turn invokes the native C preprocessor and +Xmassages its output. Similar tricks can be used with the lint(1) +Xcommand. +X +XLanguage constructs that are always rewritten: +X.TP +Xfunction headings, prototypes, pointer types +XANSI-C style function headings, function prototypes, function +Xpointer types and type casts are rewritten to old style. +X<stdarg.h> support is provided for functions with variable-length +Xargument lists. +X.TP +Xcharacter and string constants +XThe \\a and \\x escape sequences are rewritten to their (three-digit) +Xoctal equivalents. +X +XMultiple string tokens are concatenated; an arbitrary number of +Xwhitespace or comment tokens may appear between successive +Xstring tokens. +X +XWithin string constants, octal escape sequences are rewritten to the +Xthree-digit \\ddd form, so that string concatenation produces correct +Xresults. +X.TP +Xdate and time +XThe __DATE__ and __TIME__ tokens are replaced by string constants +Xof the form "Mmm dd yyyy" and "hh:mm:ss", respectively. The result +Xis subjected to string concatenation, just like any other string +Xconstant. +X.PP +XLanguage constructs that are rewritten only if the program has been +Xconfigured to do so: +X.TP +Xvoid types +XThe unprototyper can be configured to rewrite "void *" to "char *", +Xand even to rewrite plain "void" to "int". +XThese features are configurable because many traditional UNIX C +Xcompilers do not need them. +X +XNote: (void) argument lists are always replaced by empty ones. +X.PP +XANSI C constructs that are not rewritten because the traditional +XUNIX C preprocessor provides suitable workarounds: +X.TP +Xconst and volatile +XUse the "-Dconst=" and/or "-Dvolatile=" preprocessor directives to +Xget rid of unimplemented keywords. +X.TP +Xtoken pasting and stringizing +XThe traditional UNIX C preprocessor provides excellent alternatives. +XFor example: +X +X.nf +X.ne 2 +X#define string(bar) "bar" /* instead of: # x */ +X#define paste(x,y) x/**\/y /* instead of: x##y */ +X.fi +X +XThere is a good reason why the # and ## operators are not implemented +Xin the unprototyper. +XAfter program text has gone through a non-ANSI C preprocessor, all +Xinformation about the grouping of the operands of # and ## is lost. +XThus, if the unprototyper were to perform these operations, it would +Xproduce correct results only in the most trivial cases. Operands +Xwith embedded blanks, operands that expand to null tokens, and nested +Xuse of # and/or ## would cause all kinds of obscure problems. +X.PP +XUnsupported ANSI features: +X.TP +Xtrigraphs and #pragmas +XTrigraphs are useful only for systems with broken character sets. +XIf the local compiler chokes on #pragma, insert a blank before the +X"#" character, and enclose the offending directive between #ifdef +Xand #endif. +X.SH SEE ALSO +X.na +X.nf +X.ad +X.fi +Xcc(1), how to specify a non-default C preprocessor. +XSome versions of the lint(1) command are implemented as a shell +Xscript. It should require only minor modification for integration +Xwith the unprototyper. Other versions of the lint(1) command accept +Xthe same command syntax as the C compiler for the specification of a +Xnon-default preprocessor. Some research may be needed. +X.SH FILES +X.na +X.nf +X/wherever/stdarg.h, provided with the unproto filter. +X.SH DIAGNOSTICS +X.ad +X.fi +XProblems are reported on the standard error stream. +XA non-zero exit status means that there was a problem. +X.SH BUGS +X.ad +X.fi +XThe unprototyper should be run on preprocessed source only: +Xunexpanded macros may confuse the program. +X +XDeclarations of (object) are misunderstood and will result in +Xsyntax errors: the objects between parentheses disappear. +X +XSometimes does not preserve whitespace after parentheses and commas. +XThis is a purely aesthetical matter, and the compiler should not care. +XWhitespace within string constants is, of course, left intact. +X +XDoes not generate explicit type casts for function-argument +Xexpressions. The lack of explicit conversions between integral +Xand/or pointer argument types should not be a problem in environments +Xwhere sizeof(int) == sizeof(long) == sizeof(pointer). A more serious +Xproblem is the lack of automatic type conversions between integral and +Xfloating-point argument types. Let lint(1) be your friend. +X.SH AUTHOR(S) +X.na +X.nf +XWietse Venema (wietse@wzv.win.tue.nl) +XEindhoven University of Technology +XDepartment of Mathematics and Computer Science +XDen Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +X.SH LAST MODIFICATION +X.na +X.nf +X93/06/18 22:29:37 +X.SH VERSION/RELEASE +X.na +X.nf +X1.6 +END_OF_unproto.1 +if test 4954 -ne `wc -c <unproto.1`; then + echo shar: \"unproto.1\" unpacked with wrong size! +fi +# end of overwriting check +fi +echo shar: End of shell archive. +exit 0 diff --git a/varargs.c b/varargs.c new file mode 100644 index 0000000..4ca56d8 --- /dev/null +++ b/varargs.c @@ -0,0 +1,32 @@ + /* + * @(#) varargs.c 1.1 91/09/01 23:08:45 + * + * This program can be used to verify that the stdarg.h file is set up + * correctly for your system. If it works, it should print one line with the + * text "stdarg.h works". + */ + +#include <stdio.h> +#include "stdarg.h" + +main(int argc, char *argv[]) +{ + varargs_test("%s %s\n", "stdarg.h", "works"); +} + +varargs_test(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + while (*fmt) { + if (strncmp("%s", fmt, 2) == 0) { + fputs(va_arg(ap, char *), stdout); + fmt += 2; + } else { + putchar(*fmt); + fmt++; + } + } + va_end(ap); +} diff --git a/vstring.c b/vstring.c new file mode 100644 index 0000000..220bd53 --- /dev/null +++ b/vstring.c @@ -0,0 +1,122 @@ +/*++ +/* NAME +/* vs_alloc(), VS_ADDCH() +/* SUMMARY +/* auto-resizing string library +/* PACKAGE +/* vstring +/* SYNOPSIS +/* #include "vstring.h" +/* +/* struct vstring *vs_alloc(len) +/* int len; +/* +/* int VS_ADDCH(vs, wp, ch) +/* struct vstring *vs; +/* char *wp; +/* int ch; +/* +/* char *vs_strcpy(vp, dst, src) +/* struct vstring *vp; +/* char *dst; +/* char *src; +/* DESCRIPTION +/* These functions and macros implement a small library for +/* arbitrary-length strings that grow automatically when +/* they fill up. The allocation strategy is such that there +/* will always be place for the terminating null character. +/* +/* vs_alloc() allocates storage for a variable-length string +/* of at least "len" bytes. +/* +/* VS_ADDCH() adds a character to a variable-length string +/* and automagically extends the string if fills up. +/* \fIvs\fP is a pointer to a vstring structure; \fIwp\fP +/* the current write position in the corresponding character +/* array; \fIch\fP the character value to be written. +/* Note that VS_ADDCH() is a macro that evaluates some +/* arguments more than once. +/* +/* vs_strcpy() appends a null-terminated string to a variable-length +/* string. \fIsrc\fP provides the data to be copied; \fIvp\fP is the +/* target, and \fIdst\fP the current write position within the target. +/* The result is null-terminated. The return value is the new write +/* position. +/* DIAGNOSTICS +/* VS_ADDCH() returns zero if it was unable to dynamically +/* resize a string. +/* +/* vs_alloc() returns a null pointer in case of problems. +/* +/* vs_strcpy() returns a null pointer if the request failed. +/* BUGS +/* Auto-resizing may change the address of the string data in +/* a vstring structure. Beware of dangling pointers. +/* AUTHOR(S) +/* Wietse Venema +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 92/01/15 21:53:06 +/* VERSION/RELEASE +/* 1.3 +/*--*/ + +static char vstring_sccsid[] = "@(#) vstring.c 1.3 92/01/15 21:53:06"; + +/* C library */ + +extern char *malloc(); +extern char *realloc(); + +/* Application-specific stuff */ + +#include "vstring.h" + +/* vs_alloc - initial string allocation */ + +struct vstring *vs_alloc(len) +int len; +{ + register struct vstring *vp; + + if (len < 1 + || (vp = (struct vstring *) malloc(sizeof(struct vstring))) == 0 + || (vp->str = malloc(len)) == 0) + return (0); + vp->last = vp->str + len - 1; + return (vp); +} + +/* vs_realloc - extend string, update write pointer */ + +char *vs_realloc(vp, cp) +register struct vstring *vp; +char *cp; +{ + int where = cp - vp->str; + int len = vp->last - vp->str + 1; + + if ((vp->str = realloc(vp->str, len *= 2)) == 0) + return (0); + vp->last = vp->str + len - 1; + return (vp->str + where); +} + +/* vs_strcpy - copy string */ + +char *vs_strcpy(vp, dst, src) +register struct vstring *vp; +register char *dst; +register char *src; +{ + while (*src) { + if (VS_ADDCH(vp, dst, *src) == 0) + return (0); + src++; + } + *dst = '\0'; + return (dst); +} + diff --git a/vstring.h b/vstring.h new file mode 100644 index 0000000..c2e1f88 --- /dev/null +++ b/vstring.h @@ -0,0 +1,15 @@ +/* @(#) vstring.h 1.2 92/01/15 21:53:19 */ + +struct vstring { + char *str; /* string value */ + char *last; /* last position */ +}; + +extern struct vstring *vs_alloc(); /* initial allocation */ +extern char *vs_realloc(); /* string extension */ +extern char *vs_strcpy(); /* copy string */ + +/* macro to add one character to auto-resized string */ + +#define VS_ADDCH(vs,wp,c) \ + ((wp < (vs)->last || (wp = vs_realloc(vs,wp))) ? (*wp++ = c) : 0) |