summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorPeter Johnson <peter@tortall.net>2006-12-24 00:13:19 +0000
committerPeter Johnson <peter@tortall.net>2006-12-24 00:13:19 +0000
commit36a4a6c3a302de569689c00ff0f87ecf144baa6b (patch)
tree5b922bb2319f43cd677a7ef5224a4095ff265b97 /tools
parent6e6580144a5d4a07817f00ea71533cbf94f3a981 (diff)
downloadyasm-36a4a6c3a302de569689c00ff0f87ecf144baa6b.tar.gz
Port re2c parser from YACC to recursive descent.
svn path=/trunk/yasm/; revision=1718
Diffstat (limited to 'tools')
-rw-r--r--tools/re2c/Makefile.inc42
-rw-r--r--tools/re2c/parser.c249
-rw-r--r--tools/re2c/parser.h33
-rw-r--r--tools/re2c/re2c-parser.y181
-rw-r--r--tools/re2c/scanner.c2
-rw-r--r--tools/re2c/scanner.re2
6 files changed, 288 insertions, 221 deletions
diff --git a/tools/re2c/Makefile.inc b/tools/re2c/Makefile.inc
index 06f47134..abd2db87 100644
--- a/tools/re2c/Makefile.inc
+++ b/tools/re2c/Makefile.inc
@@ -16,7 +16,8 @@ EXTRA_DIST += tools/re2c/code.c
EXTRA_DIST += tools/re2c/dfa.h
EXTRA_DIST += tools/re2c/dfa.c
EXTRA_DIST += tools/re2c/parse.h
-EXTRA_DIST += tools/re2c/re2c-parser.y
+EXTRA_DIST += tools/re2c/parser.h
+EXTRA_DIST += tools/re2c/parser.c
EXTRA_DIST += tools/re2c/actions.c
EXTRA_DIST += tools/re2c/scanner.h
EXTRA_DIST += tools/re2c/scanner.c
@@ -45,8 +46,8 @@ re2c-code.$(OBJEXT): tools/re2c/code.c
re2c-dfa.$(OBJEXT): tools/re2c/dfa.c
$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/dfa.c || echo '$(srcdir)/'`tools/re2c/dfa.c
-re2c-parser.$(OBJEXT): re2c-parser.c
- $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f re2c-parser.c || echo '$(srcdir)/'`re2c-parser.c
+re2c-parser.$(OBJEXT): tools/re2c/parser.c
+ $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/parser.c || echo '$(srcdir)/'`tools/re2c/parser.c
re2c-actions.$(OBJEXT): tools/re2c/actions.c
$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/actions.c || echo '$(srcdir)/'`tools/re2c/actions.c
@@ -63,41 +64,6 @@ re2c-substr.$(OBJEXT): tools/re2c/substr.c
re2c-translate.$(OBJEXT): tools/re2c/translate.c
$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/translate.c || echo '$(srcdir)/'`tools/re2c/translate.c
-re2c-parser.c: tools/re2c/re2c-parser.y
- $(YACC) $(YFLAGS) $(AM_YFLAGS) `test -f 'tools/re2c/re2c-parser.y' || echo '$(srcdir)/'`tools/re2c/re2c-parser.y
- if test -f y.tab.h; then \
- to=`echo "re2c-parser_H" | sed \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
- -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`; \
- sed "/^#/ s/Y_TAB_H/$$to/g" y.tab.h >re2c-parser.ht; \
- rm -f y.tab.h; \
- if cmp -s re2c-parser.ht re2c-parser.h; then \
- rm -f re2c-parser.ht ;\
- else \
- mv re2c-parser.ht re2c-parser.h; \
- fi; \
- fi
- if test -f y.output; then \
- mv y.output re2c-parser.output; \
- fi
- sed '/^#/ s|y\.tab\.c|re2c-parser.c|' y.tab.c >re2c-parser.ct && mv re2c-parser.ct re2c-parser.c
- rm -f y.tab.c
-
-re2c-parser.h: re2c-parser.c
- @if test ! -f $@; then \
- rm -f re2c-parser.c; \
- $(MAKE) re2c-parser.c; \
- else :; fi
-
-BUILT_SOURCES += re2c-parser.c
-BUILT_SOURCES += re2c-parser.h
-
-CLEANFILES += re2c-parser.c
-CLEANFILES += re2c-parser.h
-
-EXTRA_DIST += re2c-parser.c
-EXTRA_DIST += re2c-parser.h
-
EXTRA_DIST += tools/re2c/CHANGELOG
EXTRA_DIST += tools/re2c/NO_WARRANTY
EXTRA_DIST += tools/re2c/README
diff --git a/tools/re2c/parser.c b/tools/re2c/parser.c
new file mode 100644
index 00000000..02d5c668
--- /dev/null
+++ b/tools/re2c/parser.c
@@ -0,0 +1,249 @@
+#include <time.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "tools/re2c/globals.h"
+#include "tools/re2c/parse.h"
+#include "tools/re2c/parser.h"
+
+int yylex(void);
+static RegExp *parse_expr(void);
+static RegExp *parse_diff(void);
+static RegExp *parse_term(void);
+static RegExp *parse_factor(void);
+static RegExp *parse_primary(void);
+
+static unsigned int accept;
+static RegExp *spec;
+static Scanner *in;
+
+static int curtok, peektok;
+yystype yylval;
+static yystype peekval;
+
+#define get_next_token() (curtok = yylex())
+
+static void
+get_peek_token(void)
+{
+ yystype temp = yylval; /* structure copy */
+ if (peektok != NONE)
+ Scanner_fatal(in, "more than one token of lookahead?");
+ peektok = yylex();
+ peekval = yylval; /* structure copy */
+ yylval = temp;
+}
+
+static void
+yyparse(void)
+{
+ RegExp *re, *look;
+
+ accept = 0;
+ spec = NULL;
+ get_next_token();
+ while (curtok != 0) {
+ switch (curtok) {
+ case ID:
+ get_peek_token();
+ if (peektok == '=') {
+ /* ID = expr; */
+ Symbol *sym = yylval.symbol;
+ get_next_token(); /* id */
+ get_next_token(); /* = */
+ re = parse_expr();
+ if (curtok != ';')
+ Scanner_fatal(in, "missing `;' after regexp");
+ get_next_token(); /* ; */
+ if (sym->re)
+ Scanner_fatal(in, "sym already defined");
+ sym->re = re;
+ break;
+ }
+ /*@fallthrough@*/
+ default:
+ /* rule: expr [/ expr] CODE */
+ re = parse_expr();
+ if (!re)
+ Scanner_fatal(in, "expression syntax error");
+
+ if (curtok == '/') {
+ get_next_token(); /* / */
+ look = parse_expr();
+ } else
+ look = RegExp_new_NullOp();
+
+ if (curtok != CODE)
+ Scanner_fatal(in, "missing code after regexp");
+ re = RegExp_new_RuleOp(re, look, yylval.token, accept++);
+ get_next_token(); /* CODE */
+ spec = spec ? mkAlt(spec, re) : re;
+ }
+ }
+}
+
+static RegExp *
+parse_expr(void)
+{
+ RegExp *e, *f;
+ e = parse_diff();
+ while (curtok == '|') {
+ get_next_token(); /* | */
+ f = parse_diff();
+ e = mkAlt(e, f);
+ }
+ return e;
+}
+
+static RegExp *
+parse_diff(void)
+{
+ RegExp *e, *f;
+ e = parse_term();
+ while (curtok == '\\') {
+ get_next_token(); /* \ */
+ f = parse_term();
+ e = mkDiff(e, f);
+ if(!e)
+ Scanner_fatal(in, "can only difference char sets");
+ }
+ return e;
+}
+
+static RegExp *
+parse_term(void)
+{
+ RegExp *e, *f;
+ e = parse_factor();
+ while ((f = parse_factor())) {
+ e = RegExp_new_CatOp(e, f);
+ }
+ return e;
+}
+
+static RegExp *
+parse_factor(void)
+{
+ RegExp *e;
+ char ch;
+ e = parse_primary();
+ while (curtok == CLOSE || curtok == CLOSESIZE) {
+ switch (curtok) {
+ case CLOSE:
+ ch = yylval.op;
+ while (get_next_token() == CLOSE) {
+ if (ch != yylval.op)
+ ch = '*';
+ }
+ switch (ch) {
+ case '*':
+ e = mkAlt(RegExp_new_CloseOp(e), RegExp_new_NullOp());
+ break;
+ case '+':
+ e = RegExp_new_CloseOp(e);
+ break;
+ case '?':
+ e = mkAlt(e, RegExp_new_NullOp());
+ break;
+ }
+ break;
+ case CLOSESIZE:
+ e = RegExp_new_CloseVOp(e, yylval.extop.minsize,
+ yylval.extop.maxsize);
+ get_next_token(); /* CLOSESIZE */
+ break;
+ default:
+ Scanner_fatal(in, "parse error");
+ break;
+ }
+ }
+ return e;
+}
+
+static RegExp *
+parse_primary(void)
+{
+ RegExp *e;
+ switch (curtok) {
+ case ID:
+ if (!yylval.symbol->re)
+ Scanner_fatal(in, "can't find symbol");
+ e = yylval.symbol->re;
+ get_next_token();
+ break;
+ case RANGE:
+ case STRING:
+ e = yylval.regexp;
+ get_next_token();
+ break;
+ case '(':
+ get_next_token();
+ e = parse_expr();
+ if (curtok != ')')
+ Scanner_fatal(in, "missing closing parenthesis");
+ get_next_token();
+ break;
+ default:
+ return NULL;
+ }
+ return e;
+}
+
+int
+yylex(void)
+{
+ if (peektok != NONE) {
+ int tok = peektok;
+ yylval = peekval;
+ peektok = NONE;
+ return tok;
+ }
+ return Scanner_scan(in);
+}
+
+void line_source(FILE *o, unsigned int line)
+{
+ char * fnamebuf;
+ char * token;
+
+ if (iFlag)
+ return;
+ fprintf(o, "#line %u \"", line);
+ if( fileName != NULL ) {
+ fnamebuf = mystrdup( fileName );
+ } else {
+ fnamebuf = mystrdup( "<stdin>" );
+ }
+ token = strtok( fnamebuf, "\\" );
+ for(;;) {
+ fprintf(o, "%s", token);
+ token = strtok( NULL, "\\" );
+ if( token == NULL ) break;
+ fputs("\\\\", o);
+ }
+ fputs("\"\n", o); oline++;
+ free( fnamebuf );
+}
+
+void parse(FILE *i, FILE *o){
+ time_t now;
+
+ time(&now);
+
+ peektok = NONE;
+
+ fputs("/* Generated by re2c 0.9.1-C on ", o);
+ fprintf(o, "%-24s", ctime(&now));
+ fputs(" */\n", o); oline+=2;
+
+ in = Scanner_new(i);
+
+ line_source(o, Scanner_line(in));
+
+ while(Scanner_echo(in, o)){
+ yyparse();
+ if(spec)
+ genCode(o, spec);
+ line_source(o, Scanner_line(in));
+ }
+}
diff --git a/tools/re2c/parser.h b/tools/re2c/parser.h
new file mode 100644
index 00000000..c433a99d
--- /dev/null
+++ b/tools/re2c/parser.h
@@ -0,0 +1,33 @@
+#ifndef RE2C_PARSER_H
+#define RE2C_PARSER_H
+
+/* Tokens */
+enum yytokentype {
+ CLOSESIZE = 258,
+ CLOSE = 259,
+ ID = 260,
+ CODE = 261,
+ RANGE = 262,
+ STRING = 263,
+ NONE = 264
+};
+
+#define CLOSESIZE 258
+#define CLOSE 259
+#define ID 260
+#define CODE 261
+#define RANGE 262
+#define STRING 263
+#define NONE 264
+
+typedef union {
+ Symbol *symbol;
+ RegExp *regexp;
+ Token *token;
+ char op;
+ ExtOp extop;
+} yystype;
+
+extern yystype yylval;
+
+#endif
diff --git a/tools/re2c/re2c-parser.y b/tools/re2c/re2c-parser.y
deleted file mode 100644
index 998cdb05..00000000
--- a/tools/re2c/re2c-parser.y
+++ /dev/null
@@ -1,181 +0,0 @@
-%{
-#include <time.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include "tools/re2c/globals.h"
-#include "tools/re2c/parse.h"
-
-/* Work around bug in Bison 2.1 */
-#define YYPARSE_PARAM unused
-
-int yylex(void);
-void yyerror(const char*);
-
-static unsigned int accept;
-static RegExp *spec;
-static Scanner *in;
-
-%}
-
-%start spec
-
-%union {
- Symbol *symbol;
- RegExp *regexp;
- Token *token;
- char op;
- ExtOp extop;
-}
-
-%token CLOSESIZE CLOSE ID CODE RANGE STRING
-
-%type <op> CLOSE
-%type <op> close
-%type <extop> CLOSESIZE
-%type <symbol> ID
-%type <token> CODE
-%type <regexp> RANGE STRING
-%type <regexp> rule look expr diff term factor primary
-
-%%
-
-spec :
- { accept = 0;
- spec = NULL; }
- | spec rule
- { spec = spec? mkAlt(spec, $2) : $2; }
- | spec decl
- ;
-
-decl : ID '=' expr ';'
- { if($1->re)
- Scanner_fatal(in, "sym already defined");
- $1->re = $3; }
- ;
-
-rule : expr look CODE
- { $$ = RegExp_new_RuleOp($1, $2, $3, accept++); }
- ;
-
-look :
- { $$ = RegExp_new_NullOp(); }
- | '/' expr
- { $$ = $2; }
- ;
-
-expr : diff
- { $$ = $1; }
- | expr '|' diff
- { $$ = mkAlt($1, $3); }
- ;
-
-diff : term
- { $$ = $1; }
- | diff '\\' term
- { $$ = mkDiff($1, $3);
- if(!$$)
- Scanner_fatal(in, "can only difference char sets");
- }
- ;
-
-term : factor
- { $$ = $1; }
- | term factor
- { $$ = RegExp_new_CatOp($1, $2); }
- ;
-
-factor : primary
- { $$ = $1; }
- | primary close
- {
- switch($2){
- case '*':
- $$ = mkAlt(RegExp_new_CloseOp($1), RegExp_new_NullOp());
- break;
- case '+':
- $$ = RegExp_new_CloseOp($1);
- break;
- case '?':
- $$ = mkAlt($1, RegExp_new_NullOp());
- break;
- }
- }
- | primary CLOSESIZE
- {
- $$ = RegExp_new_CloseVOp($1, $2.minsize, $2.maxsize);
- }
- ;
-
-close : CLOSE
- { $$ = $1; }
- | close CLOSE
- { $$ = ($1 == $2) ? $1 : '*'; }
- ;
-
-primary : ID
- { if(!$1->re)
- Scanner_fatal(in, "can't find symbol");
- $$ = $1->re; }
- | RANGE
- { $$ = $1; }
- | STRING
- { $$ = $1; }
- | '(' expr ')'
- { $$ = $2; }
- ;
-
-%%
-
-void yyerror(const char* s){
- Scanner_fatal(in, s);
-}
-
-int yylex(){
- return Scanner_scan(in);
-}
-
-void line_source(FILE *o, unsigned int line)
-{
- char * fnamebuf;
- char * token;
-
- if (iFlag)
- return;
- fprintf(o, "#line %u \"", line);
- if( fileName != NULL ) {
- fnamebuf = mystrdup( fileName );
- } else {
- fnamebuf = mystrdup( "<stdin>" );
- }
- token = strtok( fnamebuf, "\\" );
- for(;;) {
- fprintf(o, "%s", token);
- token = strtok( NULL, "\\" );
- if( token == NULL ) break;
- fputs("\\\\", o);
- }
- fputs("\"\n", o); oline++;
- free( fnamebuf );
-}
-
-void parse(FILE *i, FILE *o){
- time_t now;
-
- time(&now);
-
- fputs("/* Generated by re2c 0.9.1-C on ", o);
- fprintf(o, "%-24s", ctime(&now));
- fputs(" */\n", o); oline+=2;
-
- in = Scanner_new(i);
-
- line_source(o, Scanner_line(in));
-
- while(Scanner_echo(in, o)){
- yyparse(NULL);
- if(spec)
- genCode(o, spec);
- line_source(o, Scanner_line(in));
- }
-}
diff --git a/tools/re2c/scanner.c b/tools/re2c/scanner.c
index fd0ca93e..4640ee4a 100644
--- a/tools/re2c/scanner.c
+++ b/tools/re2c/scanner.c
@@ -6,7 +6,7 @@
#include "tools/re2c/scanner.h"
#include "tools/re2c/parse.h"
#include "tools/re2c/globals.h"
-#include "re2c-parser.h"
+#include "tools/re2c/parser.h"
#ifndef MAX
#define MAX(a,b) (((a)>(b))?(a):(b))
diff --git a/tools/re2c/scanner.re b/tools/re2c/scanner.re
index 81dcf48b..10af0883 100644
--- a/tools/re2c/scanner.re
+++ b/tools/re2c/scanner.re
@@ -3,7 +3,7 @@
#include "tools/re2c/scanner.h"
#include "tools/re2c/parse.h"
#include "tools/re2c/globals.h"
-#include "re2c-parser.h"
+#include "tools/re2c/parser.h"
#ifndef MAX
#define MAX(a,b) (((a)>(b))?(a):(b))