diff options
author | Robert de Bath <rdebath@poboxes.com> | 1996-03-24 17:45:55 +0100 |
---|---|---|
committer | Lubomir Rintel <lkundrak@v3.sk> | 2013-10-23 23:29:43 +0200 |
commit | fe22c37817ce338fbbc90b239320248c270957fa (patch) | |
tree | d9550410c4a20bdd382fcc58d2d3d7c5e04e5245 /unproto/unproto.c | |
parent | a7aba15e8efffb1c5d3097656f1a93955a64f01f (diff) | |
parent | 42192453ea219b80d0bf9f41e51e36d3d4d0740b (diff) | |
download | dev86-fe22c37817ce338fbbc90b239320248c270957fa.tar.gz |
Import Dev86-0.0.4.tar.gzv0.0.4
Diffstat (limited to 'unproto/unproto.c')
-rw-r--r-- | unproto/unproto.c | 749 |
1 files changed, 749 insertions, 0 deletions
diff --git a/unproto/unproto.c b/unproto/unproto.c new file mode 100644 index 0000000..9ca26ce --- /dev/null +++ b/unproto/unproto.c @@ -0,0 +1,749 @@ +/*++ +/* NAME +/* unproto 1 +/* SUMMARY +/* ANSI C to old C converter +/* PACKAGE +/* unproto +/* SYNOPSIS +/* /lib/cpp ... | unproto +/* +/* /somewhere/cpp ... +/* DESCRIPTION +/* This document describes a filter that sits between the +/* C preprocessor (usually \fI/lib/cpp\fP) and the next C compiler +/* pass. It rewrites ANSI-C style function headers, function type +/* declarations, function pointer types, and function pointer casts +/* to old style. Other ANSI-isms are passed on without modification +/* (token pasting, pragmas, etcetera). +/* +/* For maximal flexibility, the "cpp | unproto" pipeline can be +/* packaged as an executable shell script named "/somewhere/cpp". +/* This script should then be specified to the C compiler as a +/* non-default preprocessor. It will not work if your C compiler +/* specifies output file names to the preprocessor. +/* +/* The overhead of shell script interpretation can be avoided by +/* having the unprototyper itself open the pipe to the preprocessor. +/* In that case, the source should be compiled with the PIPE_THROUGH_CPP +/* macro defined (usually as "/lib/cpp"), and the resulting binary +/* should be installed as "/somewhere/cpp". +/* SEE ALSO +/* .ad +/* .fi +/* cc(1), how to specify a non-default C preprocessor. +/* +/* Some versions of the lint command are implemented as a shell +/* script. It should require only minor modification for integration +/* with the unprotoizer. Other versions of the lint command accept the same +/* command syntax as the C compiler for the specification of a non-default +/* preprocessor. Some research may be needed. +/* DIAGNOSTICS +/* The progam will complain if it unexpectedly +/* reaches the end of input. +/* BUGS +/* Should be run on preprocessed source only, i.e. after macro expansion. +/* +/* Declarations of (whatever) are misunderstood and will result in +/* syntax errors. +/* +/* Does not generate explicit type casts for function argument +/* expressions. +/* AUTHOR(S) +/* Wietse Venema (wietse@wzv.win.tue.nl) +/* Eindhoven University of Technology +/* Department of Mathematics and Computer Science +/* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands +/* LAST MODIFICATION +/* 91/09/22 21:21:35 +/* VERSION/RELEASE +/* 1.2 +/*--*/ + +static char unproto_sccsid[] = "@(#) unproto.c 1.3 91/11/30 21:10:30"; + +/* C library */ + +#include <stdio.h> +#include <errno.h> + +extern void exit(); +extern int optind; +extern char *optarg; +extern int getopt(); + +/* Application-specific stuff */ + +#include "vstring.h" +#include "stdarg.h" +#include "token.h" +#include "error.h" +#include "symbol.h" + +/* Forward declarations. */ + +static struct token *dcl_flush(); +static void block_flush(); +static void block_dcls(); +static struct token *show_func_ptr_type(); +static struct token *show_struct_type(); +static void show_arg_name(); +static void show_type(); +static void pair_flush(); +static void check_cast(); + +#define check_cast_flush(t) (check_cast(t), tok_free(t)) + +#ifdef PIPE_THROUGH_CPP +static int pipe_stdin_through_cpp(); +#endif + +/* Disable debugging printfs while preserving side effects. */ + +#ifdef DEBUG +#define DPRINTF printf +#else +#define DPRINTF (void) +#endif + +/* An attempt to make some complicated expressions a bit more readable. */ + +#define STREQ(x,y) (*(x) == *(y) && !strcmp((x),(y))) + +#define LAST_ARG_AND_EQUAL(s,c) ((s)->next == 0 && (s)->head \ + && ((s)->head == (s)->tail) \ + && (STREQ((s)->head->vstr->str, (c)))) + +#define LIST_BEGINS_WITH_STAR(s) (s->head->head && s->head->head->tokno == '*') + +#define IS_FUNC_PTR_TYPE(s) (s->tokno == TOK_LIST && s->next \ + && s->next->tokno == TOK_LIST \ + && LIST_BEGINS_WITH_STAR(s)) + +/* main - driver */ + +int main(argc, argv) +int argc; +char **argv; +{ + register struct token *t; +#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ + int cpp_status; + int wait_pid; + int cpp_pid; + + cpp_pid = pipe_stdin_through_cpp(argv); +#else + if ( argc > 1 ) { + if( freopen(argv[1], "r", stdin) == 0 ) { + fprintf(stderr, "Cannot open '%s'\n", argv[1]); + exit(9); + } + } + if ( argc > 2 ) { + if( freopen(argv[2], "w", stdout) == 0 ) { + fprintf(stderr, "Cannot open '%s'\n", argv[2]); + exit(9); + } + } +#endif + + sym_init(); /* prime the symbol table */ + + while (t = tok_class(DO_WSPACE)) { + if (t = dcl_flush(t)) { /* try declaration */ + if (t->tokno == '{') { /* examine rejected token */ + block_flush(t); /* body */ + } else { + tok_flush(t); /* other, recover */ + } + } + } + +#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ + while ((wait_pid = wait(&cpp_status)) != -1 && wait_pid != cpp_pid) + /* void */ ; + return (wait_pid != cpp_pid || cpp_status != 0); +#else + return (0); +#endif +} + +#ifdef PIPE_THROUGH_CPP /* pipe through /lib/cpp */ + +/* pipe_stdin_through_cpp - avoid shell script overhead */ + +static int pipe_stdin_through_cpp(argv) +char **argv; +{ + int pipefds[2]; + int pid; + char **cpptr = argv; + + /* + * With most UNIX implementations, the second non-option argument to + * /lib/cpp specifies the output file. If an output file other than + * stdout is specified, we must force /lib/cpp to write to stdout, and we + * must redirect our own standard output to the specified output file. + */ + +#define IS_OPTION(cp) ((cp)[0] == '-' && (cp)[1] != 0) + + /* Skip to first non-option argument, if any. */ + + while (*++cpptr && IS_OPTION(*cpptr)) + /* void */ ; + + /* + * Assume that the first non-option argument is the input file name. The + * next argument could be the output destination or an option (System V + * Release 2 /lib/cpp gets the options *after* the file arguments). + */ + + if (*cpptr && *++cpptr && **cpptr != '-') { + + /* + * The first non-option argument is followed by another argument that + * is not an option ("-stuff") or a hyphen ("-"). Redirect our own + * standard output before we clobber the file name. + */ + + if (freopen(*cpptr, "w", stdout) == 0) { + perror(*cpptr); + exit(1); + } + /* Clobber the file name argument so that /lib/cpp writes to stdout */ + + *cpptr = "-"; + } + /* Set up the pipe that connects /lib/cpp to our standard input. */ + + if (pipe(pipefds)) { + perror("pipe"); + exit(1); + } + switch (pid = fork()) { + case -1: /* error */ + perror("fork"); + exit(1); + case 0: /* child */ + close(pipefds[0]); /* close reading end */ + close(1); /* connect stdout to pipe */ + if (dup(pipefds[1]) != 1) + error(1, "dup() problem"); + close(pipefds[1]); /* close redundant fd */ + execv(PIPE_THROUGH_CPP, argv); + perror(PIPE_THROUGH_CPP); + exit(1); + default: /* parent */ + close(pipefds[1]); /* close writing end */ + close(0); /* connect stdin to pipe */ + if (dup(pipefds[0]) != 0) + error(1, "dup() problem"); + close(pipefds[0]); /* close redundant fd */ + return (pid); + } +} + +#endif + +/* header_flush - rewrite new-style function header to old style */ + +static void header_flush(t) +register struct token *t; +{ + register struct token *s; + + /* Do argument names, but suppress void and rewrite trailing ... */ + + if (LAST_ARG_AND_EQUAL(t->head, "void")) { + put_str("()\n"); /* no arguments */ + } else { + for (s = t->head; s; s = s->next) { /* foreach argument... */ + if (LAST_ARG_AND_EQUAL(s, "...")) { +#ifdef _VA_ALIST_ /* see ./stdarg.h */ + put_ch(s->tokno); /* ',' */ + put_str(_VA_ALIST_); /* varargs magic */ +#endif + } else { + put_ch(s->tokno); /* opening '(' or ',' */ + show_arg_name(s); /* extract argument name */ + } + } + put_str(")\n"); /* closing ')' */ + } + + /* Do argument types, but suppress void and trailing ... */ + + if (!LAST_ARG_AND_EQUAL(t->head, "void")) { + for (s = t->head; s; s = s->next) { /* foreach argument... */ + if (!LAST_ARG_AND_EQUAL(s, "...")) { + if (s->head != s->tail) { /* really new-style argument? */ + show_line_control(); /* fix line number */ + show_type(s); /* rewrite type info */ + put_str(";\n"); + } + } + } + } + tok_free(t); + show_line_control(); /* because '{' follows */ +} + +/* show_arg_name - extract argument name from argument type info */ + +static void show_arg_name(s) +register struct token *s; +{ + if (s->head) { + register struct token *p; + register struct token *t = 0; + + /* Find the last interesting item. */ + + for (p = s->head; p; p = p->next) { + if (p->tokno == TOK_WORD) { + t = p; /* remember last word */ + } else if (IS_FUNC_PTR_TYPE(p)) { + t = p; /* or function pointer */ + p = p->next; + } + } + + /* Extract argument name from last interesting item. */ + + if (t) { + if (t->tokno == TOK_LIST) + show_arg_name(t->head); /* function pointer, recurse */ + else + tok_show(t); /* print last word */ + } + } +} + +/* show_type - rewrite type to old-style syntax */ + +static void show_type(s) +register struct token *s; +{ + register struct token *p; + + for (p = s->head; p; p = p->next) { + if (IS_FUNC_PTR_TYPE(p)) { + p = show_func_ptr_type(p); /* function pointer type */ + } else { + tok_show(p); /* other */ + } + } +} + +/* show_func_ptr_type - display function_pointer type using old-style syntax */ + +static struct token *show_func_ptr_type(t) +struct token *t; +{ + register struct token *s; + + /* + * Rewrite (list1) (list2) to (list1) (). Only (list1) is given to us; + * the caller must have verified the presence of (list2). Account for the + * rare case that (list1) is a comma-separated list. That should be an + * error, but we do not want to waste any information. + */ + + for (s = t->head; s; s = s->next) { + put_ch(s->tokno); /* opening paren or ',' */ + show_type(s); /* recurse */ + } + put_str(")()"); /* closing paren */ + return (t->next); +} + +/* show_struct_type - display structured type, rewrite function-pointer types */ + +static struct token *show_struct_type(p) +register struct token *p; +{ + tok_show(p); /* opening brace */ + + while (p->next) { /* XXX cannot return 0 */ + p = p->next; + if (IS_FUNC_PTR_TYPE(p)) { + p = show_func_ptr_type(p); /* function-pointer member */ + } else if (p->tokno == '{') { + p = show_struct_type(p); /* recurse */ + } else { + tok_show(p); /* other */ + if (p->tokno == '}') { + return (p); /* done */ + } + } + } + DPRINTF("/* missing '}' */"); + return (p); +} + +/* is_func_ptr_cast - recognize function-pointer type cast */ + +static int is_func_ptr_cast(t) +register struct token *t; +{ + register struct token *p; + + /* + * Examine superficial structure. Require (list1) (list2). Require that + * list1 begins with a star. + */ + + if (!IS_FUNC_PTR_TYPE(t)) + return (0); + + /* + * Make sure that there is no name in (list1). Do not worry about + * unexpected tokens, because the compiler will complain anyway. + */ + + for (p = t->head->head; p; p = p->next) { + switch (p->tokno) { + case TOK_LIST: /* recurse */ + return (is_func_ptr_cast(p)); + case TOK_WORD: /* name in list */ + return (0); + } + } + return (1); /* no name found */ +} + +/* check_cast - display ()-delimited, comma-separated list */ + +static void check_cast(t) +struct token *t; +{ + register struct token *s; + register struct token *p; + + /* + * Rewrite function-pointer types and function-pointer casts. Do not + * blindly rewrite (*list1)(list2) to (*list1)(). Function argument lists + * are about the only thing we can discard without provoking diagnostics + * from the compiler. + */ + + for (s = t->head; s; s = s->next) { + put_ch(s->tokno); /* opening paren or ',' */ + for (p = s->head; p; p = p->next) { + switch (p->tokno) { + case TOK_LIST: + if (is_func_ptr_cast(p)) { /* not: IS_FUNC_PTR_TYPE(p) */ + p = show_func_ptr_type(p); /* or we might take away */ + } else { /* function-call arguments */ + check_cast(p); /* recurse */ + } + break; + case '{': + p = show_struct_type(p); /* rewrite func. ptr. types */ + break; + default: + tok_show(p); + break; + } + } + } + put_ch(')'); /* closing paren */ +} + +/* block_dcls - on the fly rewrite decls/initializers at start of block */ + +static void block_dcls() +{ + register struct token *t; + + /* + * Away from the top level, a declaration should be preceded by type or + * storage-class information. That is why inside blocks, structs and + * unions we insist on reading one word before passing the _next_ token + * to the dcl_flush() function. + * + * Struct and union declarations look the same everywhere: we make an + * exception for these more regular constructs and pass the "struct" and + * "union" tokens to the type_dcl() function. + */ + + while (t = tok_class(DO_WSPACE)) { + switch (t->tokno) { + case TOK_WSPACE: /* preserve white space */ + case '\n': /* preserve line count */ + tok_flush(t); + break; + case TOK_WORD: /* type declarations? */ + tok_flush(t); /* advance to next token */ + t = tok_class(DO_WSPACE); /* null return is ok */ + case TOK_COMPOSITE: /* struct or union */ + if ((t = dcl_flush(t)) == 0) + break; + /* FALLTRHOUGH */ + default: /* end of declarations */ + DPRINTF("/* end dcls */"); + /* FALLTRHOUGH */ + case '}': /* end of block */ + tok_unget(t); + return; + } + } +} + +/* block_flush - rewrite struct, union or statement block on the fly */ + +static void block_flush(t) +register struct token *t; +{ + static int count = 0; + + tok_flush(t); + DPRINTF("/*%d*/", ++count); + + /* + * Rewrite function pointer types in declarations and function pointer + * casts in initializers at start of block. + */ + + block_dcls(); + + /* Remainder of block: only rewrite function pointer casts. */ + + while (t = tok_class(DO_WSPACE)) { + if (t->tokno == TOK_LIST) { + check_cast_flush(t); + } else if (t->tokno == '{') { + block_flush(t); + } else { + tok_flush(t); + if (t->tokno == '}') { + DPRINTF("/*%d*/", count--); + return; + } + } + } + DPRINTF("/* missing '}' */"); +} + +/* pair_flush - on the fly rewrite casts in grouped stuff */ + +static void pair_flush(t, start, stop) +register struct token *t; +register int start; +register int stop; +{ + tok_flush(t); + + while (t = tok_class(DO_WSPACE)) { + if (t->tokno == start) { /* recurse */ + pair_flush(t, start, stop); + } else if (t->tokno == TOK_LIST) { /* expression or cast */ + check_cast_flush(t); + } else { /* other, copy */ + tok_flush(t); + if (t->tokno == stop) { /* done */ + return; + } + } + } + DPRINTF("/* missing '%c' */", stop); +} + +/* initializer - on the fly rewrite casts in initializer */ + +static void initializer() +{ + register struct token *t; + + while (t = tok_class(DO_WSPACE)) { + switch (t->tokno) { + case ',': /* list separator */ + case ';': /* list terminator */ + tok_unget(t); + return; + case TOK_LIST: /* expression or cast */ + check_cast_flush(t); + break; + case '[': /* array substript, may nest */ + pair_flush(t, '[', ']'); + break; + case '{': /* structured data, may nest */ + pair_flush(t, '{', '}'); + break; + default: /* other, just copy */ + tok_flush(t); + break; + } + } +} + +/* func_ptr_dcl_flush - rewrite function pointer declaration */ + +static struct token *func_ptr_dcl_flush(list) +register struct token *list; +{ + register struct token *t; + + /* + * Ignore blanks because they would be output earlier than the list that + * preceded them... Recover gracefully from syntax errors. + */ + + while (t = tok_class(NO_WSPACE)) { + switch (t->tokno) { + case '\n': /* preserve line count */ + tok_flush(t); + break; + case TOK_LIST: + /* Function pointer type: (list1) (list2) -> (list1) () */ + (void) show_func_ptr_type(list); /* may be recursive */ + tok_free(list); + tok_free(t); + return (0); + default: /* not a declaration */ + tok_unget(t); + return (list); + } + } + + /* Hit EOF; must be mistake, but do not waste any information. */ + + return (list); +} + +/* function_dcl_flush - rewrite function { heading, type declaration } */ + +static struct token *function_dcl_flush(list) +register struct token *list; +{ + register struct token *t; + + /* + * Ignore blanks because they would be output earlier than the list that + * preceded them... + */ + + while (t = tok_class(NO_WSPACE)) { + switch (t->tokno) { + case '\n': + /* Preserve line count */ + tok_flush(t); + break; + case '{': + /* Function heading: word (list) { -> old style heading */ + header_flush(list); + tok_unget(t); + return (0); + case TOK_WORD: + /* Old-style function heading: word (list) word...{ */ + tok_flush(list); + tok_unget(t); + return (0); + case TOK_LIST: + /* Function typedef? word (list1) (list) -> word (list1) () */ + tok_flush(list); + put_str("()"); + tok_free(t); + return (0); + case ',': + case ';': + /* Function type declaration: word (list) -> word () */ + tok_free(list); + put_str("()"); + tok_unget(t); + return (0); + default: + /* Something else, reject the list. */ + tok_unget(t); + return (list); + } + } + + /* Hit EOF; must be mistake, but do not waste any information. */ + + return (list); +} + +/* dcl_flush - parse declaration on the fly, return rejected token */ + +static struct token *dcl_flush(t) +register struct token *t; +{ + register int got_word; + + /* + * Away from the top level, type or storage-class information is required + * for an (extern or forward) function type declaration or a variable + * declaration. + * + * With our naive word-counting approach, this means that the caller should + * read one word before passing the next token to us. This is how we + * distinguish, for example, function declarations from function calls. + * + * An exception are structs and unions, because they look the same at any + * level. The caller should give is the "struct" or "union" token. + */ + + for (got_word = 0; t; t = tok_class(DO_WSPACE)) { + switch (t->tokno) { + case TOK_WSPACE: /* advance past blanks */ + case '\n': /* advance past newline */ + case '*': /* indirection: keep trying */ + tok_flush(t); + break; + case TOK_WORD: /* word: keep trying */ + case TOK_COMPOSITE: /* struct or union */ + got_word = 1; + tok_flush(t); + break; + default: + + /* + * Function pointer types can be preceded by zero or more words + * (at least one when not at the top level). Other stuff can be + * accepted only after we have seen at least one word (two words + * when not at the top level). See also the above comment on + * structs and unions. + */ + + if (t->tokno == TOK_LIST && LIST_BEGINS_WITH_STAR(t)) { + if (t = func_ptr_dcl_flush(t)) { + return (t); /* reject token */ + } else { + got_word = 1; /* for = and [ and , and ; */ + } + } else if (got_word == 0) { + return (t); /* reject token */ + } else { + switch (t->tokno) { + case TOK_LIST: /* function type */ + if (t = function_dcl_flush(t)) + return (t); /* reject token */ + break; + case '[': /* dimension, does not nest */ + pair_flush(t, '[', ']'); + break; + case '=': /* initializer follows */ + tok_flush(t); + initializer(); /* rewrite casts */ + break; + case '{': /* struct, union, may nest */ + block_flush(t); /* use code for stmt blocks */ + break; + case ',': /* separator: keep trying */ + got_word = 0; + tok_flush(t); + break; + case ';': /* terminator: succeed */ + tok_flush(t); + return (0); + default: /* reject token */ + return (t); + } + } + } + } + return (0); /* hit EOF */ +} |