summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorJoel E. Denny <joeldenny@joeldenny.org>2010-12-11 11:13:33 -0500
committerJoel E. Denny <joeldenny@joeldenny.org>2010-12-11 14:43:43 -0500
commitbf35c71c5827d735c125ee25b048eabf40960a55 (patch)
tree811d71a2478ecea6933d1ce34096b118b8de6f1c /tests
parent10bae98af27edbb84a06d82a20a63ad7a0cca688 (diff)
downloadbison-bf35c71c5827d735c125ee25b048eabf40960a55.tar.gz
parse.lac: implement as %define variable.
LAC = lookahead correction. See discussion at <http://lists.gnu.org/archive/html/bison-patches/2009-09/msg00034.html>. However, one point there must be corrected: because of %nonassoc, LAC is *not* always redundant for lr.type=canonical-lr. * data/yacc.c: Accept values of "none" (default) or "full" for parse.lac. Accept %define parse.lac.es-capacity to specify capacity of LAC's temporary exploratory stack. It defaults to 20 and, for now, will not grow dynamically. (b4_lac_flag, b4_lac_if): New m4 macros. Evaluate as true for parse.lac!=none. (YYBACKUP): Invoke YY_LAC_DISCARD. (YY_LAC_ESTABLISH, YY_LAC_DISCARD): New cpp macros that invoke yy_lac and track when it needs to be invoked (yy_lac): New function that, given the current stack, determines whether a token can eventually be shifted. Return status mimics yyparse return status. (yysyntax_error): Change yystate argument to yyssp so stack top can be passed to yy_lac. If LAC is requested, build expected token list by invoking yy_lac for every token instead of just checking the current state for lookaheads. Return 2 if yy_lac exhausts memory. (yyparse, yypush_parse): Use local variable yy_lac_established and cpp macros YY_LAC_ESTABLISH and YY_LAC_DISCARD to implement LAC. Update yysyntax_error invocation. Add yyexhaustedlab code if LAC is requested. * tests/conflicts.at (%nonassoc and eof): Extend to check the effect of each of -Dlr.type=canonical-lr and -Dparse.lac=full. (parse.error=verbose and consistent errors): Likewise. (LAC: %nonassoc requires splitting canonical LR states): New test group demonstrating how LAC can fix canonical LR. * tests/input.at (LAC: Errors for %define): New test group. * tests/regression.at (LAC: Exploratory stack): New test group. (LAC: Memory exhaustion): New test group.
Diffstat (limited to 'tests')
-rw-r--r--tests/conflicts.at185
-rw-r--r--tests/input.at19
-rw-r--r--tests/regression.at183
3 files changed, 362 insertions, 25 deletions
diff --git a/tests/conflicts.at b/tests/conflicts.at
index 655a666c..16e1956f 100644
--- a/tests/conflicts.at
+++ b/tests/conflicts.at
@@ -94,46 +94,52 @@ main (int argc, const char *argv[])
}
]])
-# Specify the output files to avoid problems on different file systems.
-AT_BISON_CHECK([-o input.c input.y])
+m4_pushdef([AT_NONASSOC_AND_EOF_CHECK],
+[AT_BISON_CHECK([$1[ -o input.c input.y]])
AT_COMPILE([input])
+m4_pushdef([AT_EXPECTING], [m4_if($2, [correct], [[, expecting $end]])])
+
AT_PARSER_CHECK([./input '0<0'])
AT_PARSER_CHECK([./input '0<0<0'], [1], [],
- [syntax error, unexpected '<'
+ [syntax error, unexpected '<'AT_EXPECTING
])
AT_PARSER_CHECK([./input '0>0'])
AT_PARSER_CHECK([./input '0>0>0'], [1], [],
- [syntax error, unexpected '>'
+ [syntax error, unexpected '>'AT_EXPECTING
])
AT_PARSER_CHECK([./input '0<0>0'], [1], [],
- [syntax error, unexpected '>'
+ [syntax error, unexpected '>'AT_EXPECTING
])
-# We must disable default reductions in inconsistent states in order to
-# have an explicit list of all expected tokens. (However, unless we use
-# canonical LR, lookahead sets are merged for different left contexts,
-# so it is still possible to have extra incorrect tokens in the expected
-# list. That just doesn't happen to be a problem for this test case.)
-
-AT_BISON_CHECK([-Dlr.default-reductions=consistent -o input.c input.y])
-AT_COMPILE([input])
-
-AT_PARSER_CHECK([./input '0<0'])
-AT_PARSER_CHECK([./input '0<0<0'], [1], [],
- [syntax error, unexpected '<', expecting $end
-])
+m4_popdef([AT_EXPECTING])])
-AT_PARSER_CHECK([./input '0>0'])
-AT_PARSER_CHECK([./input '0>0>0'], [1], [],
- [syntax error, unexpected '>', expecting $end
-])
+# Expected token list is missing.
+AT_NONASSOC_AND_EOF_CHECK([], [[incorrect]])
-AT_PARSER_CHECK([./input '0<0>0'], [1], [],
- [syntax error, unexpected '>', expecting $end
-])
+# We must disable default reductions in inconsistent states in order to
+# have an explicit list of all expected tokens.
+AT_NONASSOC_AND_EOF_CHECK([[-Dlr.default-reductions=consistent]],
+ [[correct]])
+
+# lr.default-reductions=consistent happens to work for this test case.
+# However, for other grammars, lookahead sets can be merged for
+# different left contexts, so it is still possible to have an incorrect
+# expected list. Canonical LR is almost a general solution (that is, it
+# can fail only when %nonassoc is used), so make sure it gives the same
+# result as above.
+AT_NONASSOC_AND_EOF_CHECK([[-Dlr.type=canonical-lr]], [[correct]])
+
+# parse.lac=full is a completely general solution that does not require
+# any of the above sacrifices. Of course, it does not extend the
+# language-recognition power of LALR to (IE)LR, but it does ensure that
+# the reported list of expected tokens matches what the given parser
+# would have accepted in place of the unexpected token.
+AT_NONASSOC_AND_EOF_CHECK([[-Dparse.lac=full]], [[correct]])
+
+m4_popdef([AT_NONASSOC_AND_EOF_CHECK])
AT_CLEANUP
@@ -342,6 +348,18 @@ AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]],
[AT_PREVIOUS_STATE_INPUT],
[[$end]], [[ab]])
+# Only LAC gets it right.
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr
+ %define parse.lac full]],
+ [AT_PREVIOUS_STATE_GRAMMAR],
+ [AT_PREVIOUS_STATE_INPUT],
+ [[$end]], [[b]])
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
+ %define parse.lac full]],
+ [AT_PREVIOUS_STATE_GRAMMAR],
+ [AT_PREVIOUS_STATE_INPUT],
+ [[$end]], [[b]])
+
m4_popdef([AT_PREVIOUS_STATE_GRAMMAR])
m4_popdef([AT_PREVIOUS_STATE_INPUT])
@@ -417,6 +435,16 @@ AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]],
[AT_USER_ACTION_INPUT],
[[$end]], [[a]])
+AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full]],
+ [AT_USER_ACTION_GRAMMAR],
+ [AT_USER_ACTION_INPUT],
+ [['b']], [[none]])
+AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full
+ %define lr.default-reductions accepting]],
+ [AT_USER_ACTION_GRAMMAR],
+ [AT_USER_ACTION_INPUT],
+ [[$end]], [[none]])
+
m4_popdef([AT_USER_ACTION_GRAMMAR])
m4_popdef([AT_USER_ACTION_INPUT])
@@ -426,6 +454,113 @@ AT_CLEANUP
+## ------------------------------------------------------- ##
+## LAC: %nonassoc requires splitting canonical LR states. ##
+## ------------------------------------------------------- ##
+
+# This test case demonstrates that, when %nonassoc is used, canonical
+# LR(1) parser table construction followed by conflict resolution
+# without further state splitting is not always sufficient to produce a
+# parser that can detect all syntax errors as soon as possible on one
+# token of lookahead. However, LAC solves the problem completely even
+# with minimal LR parser tables.
+
+AT_SETUP([[LAC: %nonassoc requires splitting canonical LR states]])
+
+AT_DATA_GRAMMAR([[input.y]],
+[[%code {
+ #include <stdio.h>
+ void yyerror (char const *);
+ int yylex (void);
+}
+
+%error-verbose
+%nonassoc 'a'
+
+%%
+
+start:
+ 'a' problem 'a' // First context.
+| 'b' problem 'b' // Second context.
+| 'c' reduce-nonassoc // Just makes reduce-nonassoc useful.
+;
+
+problem:
+ look reduce-nonassoc
+| look 'a'
+| look 'b'
+;
+
+// For the state reached after shifting the 'a' in these productions,
+// lookahead sets are the same in both the first and second contexts.
+// Thus, canonical LR reuses the same state for both contexts. However,
+// the lookahead 'a' for the reduction "look: 'a'" later becomes an
+// error action only in the first context. In order to immediately
+// detect the syntax error on 'a' here for only the first context, this
+// canonical LR state would have to be split into two states, and the
+// 'a' lookahead would have to be removed from only one of the states.
+look:
+ 'a' // Reduction lookahead set is always ['a', 'b'].
+| 'a' 'b'
+| 'a' 'c' // 'c' is forgotten as an expected token.
+;
+
+reduce-nonassoc: %prec 'a';
+
+%%
+
+void
+yyerror (char const *msg)
+{
+ fprintf (stderr, "%s\n", msg);
+}
+
+int
+yylex (void)
+{
+ char const *input = "aaa";
+ return *input++;
+}
+
+int
+main (void)
+{
+ return yyparse ();
+}
+]])
+
+# Show canonical LR's failure.
+AT_BISON_CHECK([[-Dlr.type=canonical-lr -o input.c input.y]],
+ [[0]], [[]],
+[[input.y: conflicts: 2 shift/reduce
+]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input]], [[1]], [[]],
+[[syntax error, unexpected 'a', expecting 'b'
+]])
+
+# It's corrected by LAC.
+AT_BISON_CHECK([[-Dlr.type=canonical-lr -Dparse.lac=full \
+ -o input.c input.y]], [[0]], [[]],
+[[input.y: conflicts: 2 shift/reduce
+]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input]], [[1]], [[]],
+[[syntax error, unexpected 'a', expecting 'b' or 'c'
+]])
+
+# IELR is sufficient when LAC is used.
+AT_BISON_CHECK([[-Dlr.type=ielr -Dparse.lac=full -o input.c input.y]],
+ [[0]], [[]],
+[[input.y: conflicts: 2 shift/reduce
+]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input]], [[1]], [[]],
+[[syntax error, unexpected 'a', expecting 'b' or 'c'
+]])
+
+AT_CLEANUP
+
## ------------------------- ##
## Unresolved SR Conflicts. ##
## ------------------------- ##
diff --git a/tests/input.at b/tests/input.at
index 241c4d06..25466854 100644
--- a/tests/input.at
+++ b/tests/input.at
@@ -1289,3 +1289,22 @@ input.y:5.19: invalid character after \-escape: \001
]])
AT_CLEANUP
+
+## ------------------------- ##
+## LAC: Errors for %define. ##
+## ------------------------- ##
+
+AT_SETUP([[LAC: Errors for %define]])
+
+AT_DATA([[input.y]],
+[[%%
+start: ;
+]])
+
+# parse.lac.* options are useless if LAC isn't actually activated.
+AT_BISON_CHECK([[-Dparse.lac.es-capacity-initial=1 input.y]],
+ [[1]], [],
+[[<command line>:2: %define variable `parse.lac.es-capacity-initial' is not used
+]])
+
+AT_CLEANUP
diff --git a/tests/regression.at b/tests/regression.at
index b3fdc29f..65d4ac23 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -1469,3 +1469,186 @@ memory exhausted
]])
AT_CLEANUP
+
+
+
+## ------------------------ ##
+## LAC: Exploratory stack. ##
+## ------------------------ ##
+
+AT_SETUP([[LAC: Exploratory stack]])
+
+m4_pushdef([AT_LAC_CHECK], [
+
+AT_BISON_OPTION_PUSHDEFS([$1])
+
+AT_DATA_GRAMMAR([input.y],
+[[%code {
+ #include <stdio.h>
+ void yyerror (char const *);
+ int yylex (]AT_PURE_IF([[YYSTYPE *]], [[void]])[);
+}
+
+]$1[
+%define parse.error verbose
+%token 'c'
+
+%%
+
+// default reductions in inconsistent states
+// v v v v v v v v v v v v v v
+S: A B A A B A A A A B A A A A A A A B C C A A A A A A A A A A A A B ;
+
+A: 'a' | /*empty*/ { printf ("inconsistent default reduction\n"); } ;
+B: 'b' ;
+C: /*empty*/ { printf ("consistent default reduction\n"); } ;
+
+%%
+
+void
+yyerror (char const *msg)
+{
+ fprintf (stderr, "%s\n", msg);
+}
+
+int
+yylex (]AT_PURE_IF([[YYSTYPE *v]], [[void]])[)
+{
+ static char const *input = "bbbbc";]AT_PURE_IF([[
+ *v = 0;]])[
+ return *input++;
+}
+
+int
+main (void)
+{
+ yydebug = 1;
+ return yyparse ();
+}
+]])
+
+# Give exactly the right amount of memory to be sure there's no
+# off-by-one error, for example.
+AT_BISON_CHECK([[-Dparse.lac=full -Dparse.lac.es-capacity=12 \
+ -t -o input.c input.y]], [[0]], [],
+[[input.y: conflicts: 21 shift/reduce
+]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input > stdout.txt 2> stderr.txt]], [[1]])
+
+# Make sure syntax error doesn't forget that 'a' is expected. It would
+# be forgotten without lookahead correction.
+AT_CHECK([[grep 'syntax error,' stderr.txt]], [[0]],
+[[syntax error, unexpected 'c', expecting 'a' or 'b'
+]])
+
+# Check number of default reductions in inconsistent states to be sure
+# syntax error is detected before unnecessary reductions are performed.
+AT_CHECK([[perl -0777 -ne 'print s/inconsistent default reduction//g;' \
+ < stdout.txt || exit 77]], [[0]], [[14]])
+
+# Check number of default reductions in consistent states to be sure
+# it is performed before the syntax error is detected.
+AT_CHECK([[perl -0777 -ne 'print s/\bconsistent default reduction//g;' \
+ < stdout.txt || exit 77]], [[0]], [[2]])
+
+AT_BISON_OPTION_POPDEFS
+])
+
+AT_LAC_CHECK([[%define api.push-pull pull]])
+AT_LAC_CHECK([[%define api.push-pull pull %define api.pure]])
+AT_LAC_CHECK([[%define api.push-pull both]])
+AT_LAC_CHECK([[%define api.push-pull both %define api.pure]])
+
+m4_popdef([AT_LAC_CHECK])
+
+AT_CLEANUP
+
+
+
+## ------------------------ ##
+## LAC: Memory exhaustion. ##
+## ------------------------ ##
+
+AT_SETUP([[LAC: Memory exhaustion]])
+
+m4_pushdef([AT_LAC_CHECK], [
+
+AT_DATA_GRAMMAR([input.y],
+[[%code {
+ #include <stdio.h>
+ void yyerror (char const *);
+ int yylex (void);
+}
+
+%error-verbose
+
+%%
+
+S: A A A A A A A A A ;
+A: /*empty*/ | 'a' ;
+
+%%
+
+void
+yyerror (char const *msg)
+{
+ fprintf (stderr, "%s\n", msg);
+}
+
+int
+yylex (void)
+{
+ static char const *input = "]$1[";
+ return *input++;
+}
+
+int
+main (void)
+{
+ yydebug = 1;
+ return yyparse ();
+}
+]])
+
+AT_BISON_CHECK([[-Dparse.lac=full -Dparse.lac.es-capacity=8 \
+ -t -o input.c input.y]], [[0]], [],
+[[input.y: conflicts: 8 shift/reduce
+]])
+AT_COMPILE([[input]])
+
+])
+
+# Check for memory exhaustion during parsing.
+AT_LAC_CHECK([[]])
+AT_PARSER_CHECK([[./input]], [[2]], [[]],
+[[Starting parse
+Entering state 0
+Reading a token: Now at end of input.
+LAC: initial context established for $end
+LAC: checking lookahead $end: R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max stack size exceeded)
+memory exhausted
+Cleanup: discarding lookahead token $end ()
+Stack now 0
+]])
+
+# Induce an immediate syntax error with an undefined token, and check
+# for memory exhaustion while building syntax error message.
+AT_LAC_CHECK([[z]], [[0]])
+AT_PARSER_CHECK([[./input]], [[2]], [[]],
+[[Starting parse
+Entering state 0
+Reading a token: Next token is token $undefined ()
+LAC: initial context established for $undefined
+LAC: checking lookahead $undefined: Always Err
+Constructing syntax error message
+LAC: checking lookahead $end: R2 G3 R2 G5 R2 G6 R2 G7 R2 G8 R2 G9 R2 G10 R2 G11 R2 (max stack size exceeded)
+syntax error
+memory exhausted
+Cleanup: discarding lookahead token $undefined ()
+Stack now 0
+]])
+
+m4_popdef([AT_LAC_CHECK])
+
+AT_CLEANUP