diff options
author | Peter Johnson <peter@tortall.net> | 2006-12-24 00:13:19 +0000 |
---|---|---|
committer | Peter Johnson <peter@tortall.net> | 2006-12-24 00:13:19 +0000 |
commit | 36a4a6c3a302de569689c00ff0f87ecf144baa6b (patch) | |
tree | 5b922bb2319f43cd677a7ef5224a4095ff265b97 /tools | |
parent | 6e6580144a5d4a07817f00ea71533cbf94f3a981 (diff) | |
download | yasm-36a4a6c3a302de569689c00ff0f87ecf144baa6b.tar.gz |
Port re2c parser from YACC to recursive descent.
svn path=/trunk/yasm/; revision=1718
Diffstat (limited to 'tools')
-rw-r--r-- | tools/re2c/Makefile.inc | 42 | ||||
-rw-r--r-- | tools/re2c/parser.c | 249 | ||||
-rw-r--r-- | tools/re2c/parser.h | 33 | ||||
-rw-r--r-- | tools/re2c/re2c-parser.y | 181 | ||||
-rw-r--r-- | tools/re2c/scanner.c | 2 | ||||
-rw-r--r-- | tools/re2c/scanner.re | 2 |
6 files changed, 288 insertions, 221 deletions
diff --git a/tools/re2c/Makefile.inc b/tools/re2c/Makefile.inc index 06f47134..abd2db87 100644 --- a/tools/re2c/Makefile.inc +++ b/tools/re2c/Makefile.inc @@ -16,7 +16,8 @@ EXTRA_DIST += tools/re2c/code.c EXTRA_DIST += tools/re2c/dfa.h EXTRA_DIST += tools/re2c/dfa.c EXTRA_DIST += tools/re2c/parse.h -EXTRA_DIST += tools/re2c/re2c-parser.y +EXTRA_DIST += tools/re2c/parser.h +EXTRA_DIST += tools/re2c/parser.c EXTRA_DIST += tools/re2c/actions.c EXTRA_DIST += tools/re2c/scanner.h EXTRA_DIST += tools/re2c/scanner.c @@ -45,8 +46,8 @@ re2c-code.$(OBJEXT): tools/re2c/code.c re2c-dfa.$(OBJEXT): tools/re2c/dfa.c $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/dfa.c || echo '$(srcdir)/'`tools/re2c/dfa.c -re2c-parser.$(OBJEXT): re2c-parser.c - $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f re2c-parser.c || echo '$(srcdir)/'`re2c-parser.c +re2c-parser.$(OBJEXT): tools/re2c/parser.c + $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/parser.c || echo '$(srcdir)/'`tools/re2c/parser.c re2c-actions.$(OBJEXT): tools/re2c/actions.c $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/actions.c || echo '$(srcdir)/'`tools/re2c/actions.c @@ -63,41 +64,6 @@ re2c-substr.$(OBJEXT): tools/re2c/substr.c re2c-translate.$(OBJEXT): tools/re2c/translate.c $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/translate.c || echo '$(srcdir)/'`tools/re2c/translate.c -re2c-parser.c: tools/re2c/re2c-parser.y - $(YACC) $(YFLAGS) $(AM_YFLAGS) `test -f 'tools/re2c/re2c-parser.y' || echo '$(srcdir)/'`tools/re2c/re2c-parser.y - if test -f y.tab.h; then \ - to=`echo "re2c-parser_H" | sed \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \ - -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`; \ - sed "/^#/ s/Y_TAB_H/$$to/g" y.tab.h >re2c-parser.ht; \ - rm -f y.tab.h; \ - if cmp -s re2c-parser.ht re2c-parser.h; then \ - rm -f re2c-parser.ht ;\ - else \ - mv re2c-parser.ht re2c-parser.h; \ - fi; \ - fi - if test -f y.output; then \ - mv y.output re2c-parser.output; \ - fi - sed '/^#/ s|y\.tab\.c|re2c-parser.c|' y.tab.c >re2c-parser.ct && mv re2c-parser.ct re2c-parser.c - rm -f y.tab.c - -re2c-parser.h: re2c-parser.c - @if test ! -f $@; then \ - rm -f re2c-parser.c; \ - $(MAKE) re2c-parser.c; \ - else :; fi - -BUILT_SOURCES += re2c-parser.c -BUILT_SOURCES += re2c-parser.h - -CLEANFILES += re2c-parser.c -CLEANFILES += re2c-parser.h - -EXTRA_DIST += re2c-parser.c -EXTRA_DIST += re2c-parser.h - EXTRA_DIST += tools/re2c/CHANGELOG EXTRA_DIST += tools/re2c/NO_WARRANTY EXTRA_DIST += tools/re2c/README diff --git a/tools/re2c/parser.c b/tools/re2c/parser.c new file mode 100644 index 00000000..02d5c668 --- /dev/null +++ b/tools/re2c/parser.c @@ -0,0 +1,249 @@ +#include <time.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "tools/re2c/globals.h" +#include "tools/re2c/parse.h" +#include "tools/re2c/parser.h" + +int yylex(void); +static RegExp *parse_expr(void); +static RegExp *parse_diff(void); +static RegExp *parse_term(void); +static RegExp *parse_factor(void); +static RegExp *parse_primary(void); + +static unsigned int accept; +static RegExp *spec; +static Scanner *in; + +static int curtok, peektok; +yystype yylval; +static yystype peekval; + +#define get_next_token() (curtok = yylex()) + +static void +get_peek_token(void) +{ + yystype temp = yylval; /* structure copy */ + if (peektok != NONE) + Scanner_fatal(in, "more than one token of lookahead?"); + peektok = yylex(); + peekval = yylval; /* structure copy */ + yylval = temp; +} + +static void +yyparse(void) +{ + RegExp *re, *look; + + accept = 0; + spec = NULL; + get_next_token(); + while (curtok != 0) { + switch (curtok) { + case ID: + get_peek_token(); + if (peektok == '=') { + /* ID = expr; */ + Symbol *sym = yylval.symbol; + get_next_token(); /* id */ + get_next_token(); /* = */ + re = parse_expr(); + if (curtok != ';') + Scanner_fatal(in, "missing `;' after regexp"); + get_next_token(); /* ; */ + if (sym->re) + Scanner_fatal(in, "sym already defined"); + sym->re = re; + break; + } + /*@fallthrough@*/ + default: + /* rule: expr [/ expr] CODE */ + re = parse_expr(); + if (!re) + Scanner_fatal(in, "expression syntax error"); + + if (curtok == '/') { + get_next_token(); /* / */ + look = parse_expr(); + } else + look = RegExp_new_NullOp(); + + if (curtok != CODE) + Scanner_fatal(in, "missing code after regexp"); + re = RegExp_new_RuleOp(re, look, yylval.token, accept++); + get_next_token(); /* CODE */ + spec = spec ? mkAlt(spec, re) : re; + } + } +} + +static RegExp * +parse_expr(void) +{ + RegExp *e, *f; + e = parse_diff(); + while (curtok == '|') { + get_next_token(); /* | */ + f = parse_diff(); + e = mkAlt(e, f); + } + return e; +} + +static RegExp * +parse_diff(void) +{ + RegExp *e, *f; + e = parse_term(); + while (curtok == '\\') { + get_next_token(); /* \ */ + f = parse_term(); + e = mkDiff(e, f); + if(!e) + Scanner_fatal(in, "can only difference char sets"); + } + return e; +} + +static RegExp * +parse_term(void) +{ + RegExp *e, *f; + e = parse_factor(); + while ((f = parse_factor())) { + e = RegExp_new_CatOp(e, f); + } + return e; +} + +static RegExp * +parse_factor(void) +{ + RegExp *e; + char ch; + e = parse_primary(); + while (curtok == CLOSE || curtok == CLOSESIZE) { + switch (curtok) { + case CLOSE: + ch = yylval.op; + while (get_next_token() == CLOSE) { + if (ch != yylval.op) + ch = '*'; + } + switch (ch) { + case '*': + e = mkAlt(RegExp_new_CloseOp(e), RegExp_new_NullOp()); + break; + case '+': + e = RegExp_new_CloseOp(e); + break; + case '?': + e = mkAlt(e, RegExp_new_NullOp()); + break; + } + break; + case CLOSESIZE: + e = RegExp_new_CloseVOp(e, yylval.extop.minsize, + yylval.extop.maxsize); + get_next_token(); /* CLOSESIZE */ + break; + default: + Scanner_fatal(in, "parse error"); + break; + } + } + return e; +} + +static RegExp * +parse_primary(void) +{ + RegExp *e; + switch (curtok) { + case ID: + if (!yylval.symbol->re) + Scanner_fatal(in, "can't find symbol"); + e = yylval.symbol->re; + get_next_token(); + break; + case RANGE: + case STRING: + e = yylval.regexp; + get_next_token(); + break; + case '(': + get_next_token(); + e = parse_expr(); + if (curtok != ')') + Scanner_fatal(in, "missing closing parenthesis"); + get_next_token(); + break; + default: + return NULL; + } + return e; +} + +int +yylex(void) +{ + if (peektok != NONE) { + int tok = peektok; + yylval = peekval; + peektok = NONE; + return tok; + } + return Scanner_scan(in); +} + +void line_source(FILE *o, unsigned int line) +{ + char * fnamebuf; + char * token; + + if (iFlag) + return; + fprintf(o, "#line %u \"", line); + if( fileName != NULL ) { + fnamebuf = mystrdup( fileName ); + } else { + fnamebuf = mystrdup( "<stdin>" ); + } + token = strtok( fnamebuf, "\\" ); + for(;;) { + fprintf(o, "%s", token); + token = strtok( NULL, "\\" ); + if( token == NULL ) break; + fputs("\\\\", o); + } + fputs("\"\n", o); oline++; + free( fnamebuf ); +} + +void parse(FILE *i, FILE *o){ + time_t now; + + time(&now); + + peektok = NONE; + + fputs("/* Generated by re2c 0.9.1-C on ", o); + fprintf(o, "%-24s", ctime(&now)); + fputs(" */\n", o); oline+=2; + + in = Scanner_new(i); + + line_source(o, Scanner_line(in)); + + while(Scanner_echo(in, o)){ + yyparse(); + if(spec) + genCode(o, spec); + line_source(o, Scanner_line(in)); + } +} diff --git a/tools/re2c/parser.h b/tools/re2c/parser.h new file mode 100644 index 00000000..c433a99d --- /dev/null +++ b/tools/re2c/parser.h @@ -0,0 +1,33 @@ +#ifndef RE2C_PARSER_H +#define RE2C_PARSER_H + +/* Tokens */ +enum yytokentype { + CLOSESIZE = 258, + CLOSE = 259, + ID = 260, + CODE = 261, + RANGE = 262, + STRING = 263, + NONE = 264 +}; + +#define CLOSESIZE 258 +#define CLOSE 259 +#define ID 260 +#define CODE 261 +#define RANGE 262 +#define STRING 263 +#define NONE 264 + +typedef union { + Symbol *symbol; + RegExp *regexp; + Token *token; + char op; + ExtOp extop; +} yystype; + +extern yystype yylval; + +#endif diff --git a/tools/re2c/re2c-parser.y b/tools/re2c/re2c-parser.y deleted file mode 100644 index 998cdb05..00000000 --- a/tools/re2c/re2c-parser.y +++ /dev/null @@ -1,181 +0,0 @@ -%{ -#include <time.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include "tools/re2c/globals.h" -#include "tools/re2c/parse.h" - -/* Work around bug in Bison 2.1 */ -#define YYPARSE_PARAM unused - -int yylex(void); -void yyerror(const char*); - -static unsigned int accept; -static RegExp *spec; -static Scanner *in; - -%} - -%start spec - -%union { - Symbol *symbol; - RegExp *regexp; - Token *token; - char op; - ExtOp extop; -} - -%token CLOSESIZE CLOSE ID CODE RANGE STRING - -%type <op> CLOSE -%type <op> close -%type <extop> CLOSESIZE -%type <symbol> ID -%type <token> CODE -%type <regexp> RANGE STRING -%type <regexp> rule look expr diff term factor primary - -%% - -spec : - { accept = 0; - spec = NULL; } - | spec rule - { spec = spec? mkAlt(spec, $2) : $2; } - | spec decl - ; - -decl : ID '=' expr ';' - { if($1->re) - Scanner_fatal(in, "sym already defined"); - $1->re = $3; } - ; - -rule : expr look CODE - { $$ = RegExp_new_RuleOp($1, $2, $3, accept++); } - ; - -look : - { $$ = RegExp_new_NullOp(); } - | '/' expr - { $$ = $2; } - ; - -expr : diff - { $$ = $1; } - | expr '|' diff - { $$ = mkAlt($1, $3); } - ; - -diff : term - { $$ = $1; } - | diff '\\' term - { $$ = mkDiff($1, $3); - if(!$$) - Scanner_fatal(in, "can only difference char sets"); - } - ; - -term : factor - { $$ = $1; } - | term factor - { $$ = RegExp_new_CatOp($1, $2); } - ; - -factor : primary - { $$ = $1; } - | primary close - { - switch($2){ - case '*': - $$ = mkAlt(RegExp_new_CloseOp($1), RegExp_new_NullOp()); - break; - case '+': - $$ = RegExp_new_CloseOp($1); - break; - case '?': - $$ = mkAlt($1, RegExp_new_NullOp()); - break; - } - } - | primary CLOSESIZE - { - $$ = RegExp_new_CloseVOp($1, $2.minsize, $2.maxsize); - } - ; - -close : CLOSE - { $$ = $1; } - | close CLOSE - { $$ = ($1 == $2) ? $1 : '*'; } - ; - -primary : ID - { if(!$1->re) - Scanner_fatal(in, "can't find symbol"); - $$ = $1->re; } - | RANGE - { $$ = $1; } - | STRING - { $$ = $1; } - | '(' expr ')' - { $$ = $2; } - ; - -%% - -void yyerror(const char* s){ - Scanner_fatal(in, s); -} - -int yylex(){ - return Scanner_scan(in); -} - -void line_source(FILE *o, unsigned int line) -{ - char * fnamebuf; - char * token; - - if (iFlag) - return; - fprintf(o, "#line %u \"", line); - if( fileName != NULL ) { - fnamebuf = mystrdup( fileName ); - } else { - fnamebuf = mystrdup( "<stdin>" ); - } - token = strtok( fnamebuf, "\\" ); - for(;;) { - fprintf(o, "%s", token); - token = strtok( NULL, "\\" ); - if( token == NULL ) break; - fputs("\\\\", o); - } - fputs("\"\n", o); oline++; - free( fnamebuf ); -} - -void parse(FILE *i, FILE *o){ - time_t now; - - time(&now); - - fputs("/* Generated by re2c 0.9.1-C on ", o); - fprintf(o, "%-24s", ctime(&now)); - fputs(" */\n", o); oline+=2; - - in = Scanner_new(i); - - line_source(o, Scanner_line(in)); - - while(Scanner_echo(in, o)){ - yyparse(NULL); - if(spec) - genCode(o, spec); - line_source(o, Scanner_line(in)); - } -} diff --git a/tools/re2c/scanner.c b/tools/re2c/scanner.c index fd0ca93e..4640ee4a 100644 --- a/tools/re2c/scanner.c +++ b/tools/re2c/scanner.c @@ -6,7 +6,7 @@ #include "tools/re2c/scanner.h" #include "tools/re2c/parse.h" #include "tools/re2c/globals.h" -#include "re2c-parser.h" +#include "tools/re2c/parser.h" #ifndef MAX #define MAX(a,b) (((a)>(b))?(a):(b)) diff --git a/tools/re2c/scanner.re b/tools/re2c/scanner.re index 81dcf48b..10af0883 100644 --- a/tools/re2c/scanner.re +++ b/tools/re2c/scanner.re @@ -3,7 +3,7 @@ #include "tools/re2c/scanner.h" #include "tools/re2c/parse.h" #include "tools/re2c/globals.h" -#include "re2c-parser.h" +#include "tools/re2c/parser.h" #ifndef MAX #define MAX(a,b) (((a)>(b))?(a):(b)) |