diff options
author | Akim Demaille <akim.demaille@gmail.com> | 2021-08-07 09:30:24 +0200 |
---|---|---|
committer | Akim Demaille <akim.demaille@gmail.com> | 2021-08-07 12:53:19 +0200 |
commit | 80db1029e67e5d42fa6329489558b114fdfea880 (patch) | |
tree | 72f350019318187faa1d941c04eb79145cf996c0 | |
parent | 6118406c3eac88a06a414edf4a50b175fa339916 (diff) | |
download | bison-80db1029e67e5d42fa6329489558b114fdfea880.tar.gz |
m4: catch suspicions of unevaluated macros
Check in m4's output if there are sequences such as m4_foo or b4_foo,
which are probably resulting from incorrect m4 processing.
It actually already is useful:
- it caught a leaking b4_lac_if leaking from glr.c, where LAC is not
supported, hence b4_lac_if is not defined.
- it also caught references to location.hh in position.hh when
location.hh does not exist.
- while making "Code injection" robust to these new warnings (it is
its very purpose to let b4_canary pass unevaluated), I saw that it
did not check lalr1.d, and when adding lalr1.d, it revealed it did
underquote ocurrences of token value types.
* src/scan-skel.l (macro): New abbreviation.
Use it.
* data/skeletons/glr.c: Don't use b4_lac_if, we don't have it.
* data/skeletons/location.cc: Don't generate position.hh when we don't
generate location.hh.
* data/skeletons/d.m4 (b4_basic_symbol_constructor_define): Fix
underquotation.
* data/skeletons/bison.m4 (b4_canary): New.
* tests/input.at (Code injection): Use it, and check lalr1.d too.
-rw-r--r-- | TODO | 4 | ||||
-rw-r--r-- | data/skeletons/bison.m4 | 7 | ||||
-rw-r--r-- | data/skeletons/d.m4 | 4 | ||||
-rw-r--r-- | data/skeletons/glr.c | 9 | ||||
-rw-r--r-- | data/skeletons/location.cc | 17 | ||||
-rw-r--r-- | src/scan-skel.l | 19 | ||||
-rw-r--r-- | tests/input.at | 37 |
7 files changed, 57 insertions, 40 deletions
@@ -108,10 +108,6 @@ enough. *** calc.at Stop hard-coding "Calc". Adjust local.at (look for FIXME). -** A dev warning for b4_ -Maybe we should check for m4_ and b4_ leaking out of the m4 processing, as -Autoconf does. It would have caught over-quotation issues. - ** doc I feel it's ugly to use the GNU style to declare functions in the doc. It generates tons of white space in the page, and may contribute to bad page diff --git a/data/skeletons/bison.m4 b/data/skeletons/bison.m4 index 6d92e4f9..488ad610 100644 --- a/data/skeletons/bison.m4 +++ b/data/skeletons/bison.m4 @@ -261,6 +261,13 @@ m4_define([b4_fatal_at], [b4_error([[fatal]], $@)dnl m4_exit(1)]) +# b4_canary(MSG) +# -------------- +# Issue a warning on stderr and in the output. Used in the test suite +# to catch spurious m4 evaluations. +m4_define([b4_canary], +[m4_errprintn([dead canary: $1])DEAD CANARY($1)]) + ## ------------ ## ## Data Types. ## diff --git a/data/skeletons/d.m4 b/data/skeletons/d.m4 index b69df1a7..c0632e47 100644 --- a/data/skeletons/d.m4 +++ b/data/skeletons/d.m4 @@ -586,7 +586,7 @@ alias ACCEPT = ]b4_parser_class[.YYACCEPT;]])[]])[ # Create Symbol struct constructors for all the visible types. m4_define([b4_basic_symbol_constructor_define], [b4_token_visible_if([$1], -[ this(TokenKind token]b4_symbol_if([$1], [has_type], +[[ this(TokenKind token]b4_symbol_if([$1], [has_type], [[, ]b4_union_if([], [[typeof(YYSemanticType.]])b4_symbol([$1], [type])dnl []b4_union_if([], [[) ]])[ val]])[]b4_locations_if([[, Location loc]])[) { @@ -601,7 +601,7 @@ m4_define([b4_basic_symbol_constructor_define], value_.]b4_symbol([$1], [type])[ = val;]])])[]b4_locations_if([ location_ = loc;])[ } -])]) +]])]) # b4_symbol_type_define diff --git a/data/skeletons/glr.c b/data/skeletons/glr.c index 1f956955..70ab8b1d 100644 --- a/data/skeletons/glr.c +++ b/data/skeletons/glr.c @@ -2204,12 +2204,7 @@ yy_syntax_error_arguments (const yyGLRStack* yystackp, - Don't assume there isn't a lookahead just because this state is a consistent state with a default action. There might have been a previous inconsistent state, consistent state with a non-default - action, or user semantic action that manipulated yychar.]b4_lac_if([[ - In the first two cases, it might appear that the current syntax - error should have been detected in the previous state when yy_lac - was invoked. However, at that time, there might have been a - different syntax error that discarded a different initial context - during error recovery, leaving behind the current lookahead.]], [[ + action, or user semantic action that manipulated yychar. - Of course, the expected token list depends on states to have correct lookahead information, and it depends on the parser not to perform extra reductions after fetching a lookahead from the @@ -2217,7 +2212,7 @@ yy_syntax_error_arguments (const yyGLRStack* yystackp, (from LALR or IELR) and default reductions corrupt the expected token list. However, the list is correct for canonical LR with one exception: it will still contain any token that will not be - accepted due to an error action in a later state.]])[ + accepted due to an error action in a later state. */ if (yytoken != ]b4_symbol(empty, kind)[) { diff --git a/data/skeletons/location.cc b/data/skeletons/location.cc index b32c1377..3870b2bc 100644 --- a/data/skeletons/location.cc +++ b/data/skeletons/location.cc @@ -19,13 +19,6 @@ m4_pushdef([b4_copyright_years], [2002-2015, 2018-2021]) -# b4_position_file -# ---------------- -# Name of the file containing the position class, if we want this file. -b4_header_if([b4_required_version_if([30200], [], - [m4_define([b4_position_file], [position.hh])])])]) - - # b4_location_file # ---------------- # Name of the file containing the position/location class, @@ -54,6 +47,16 @@ m4_ifdef([b4_location_file], ]) +# b4_position_file +# ---------------- +# Name of the file containing the position class, if we want this file. +b4_header_if( + [b4_required_version_if( + [30200], [], + [m4_ifdef([b4_location_file], + [m4_define([b4_position_file], [position.hh])])])]) + + # b4_location_define # ------------------ diff --git a/src/scan-skel.l b/src/scan-skel.l index 20049cfb..60232a2e 100644 --- a/src/scan-skel.l +++ b/src/scan-skel.l @@ -53,6 +53,9 @@ static void fail_for_invalid_at (char const *at); static void output_mapped_file (char const *name); %} +/* Identifiers of our M4 macros. */ +macro [bm]4_[a-zA-Z_0-9]* + %x SC_AT_DIRECTIVE_ARGS %x SC_AT_DIRECTIVE_SKIP_WS @@ -87,7 +90,21 @@ static void output_mapped_file (char const *name); /* This pattern must not match more than the previous @ patterns. */ @[^@{}''(\n]* fail_for_invalid_at (yytext); \n out_lineno++; ECHO; -[^@\n]+ ECHO; +[^bm@\n]+ ECHO; + + /* If there are still identifiers that look like macros, such as + b4_synbol, this probably an error, say a typo in M4, or + overquotation. */ +{macro} { + location loc = empty_loc; + loc.start.file = map_file_name (out_name); + loc.start.line = out_lineno; + loc.end = loc.start; + complain (&loc, Wother, + "suspicious sequence in the output: %s", yytext); + ECHO; + } +. ECHO; <INITIAL><<EOF>> { if (out_name) diff --git a/tests/input.at b/tests/input.at index 5f298bc9..f41c3657 100644 --- a/tests/input.at +++ b/tests/input.at @@ -2889,26 +2889,26 @@ m4_pattern_allow([^m4_errprintn$]) # Try to have MACRO be run by bison. m4_pushdef([AT_TEST], [AT_DATA([[input.y]], -[[%type <$1(DEAD %type)> exp -%token <$1(DEAD %token)> a +[[%type <$1(%type)> exp +%token <$1(%token)> a %token b %initial-action { $$; - $<$1(DEAD %initial-action)>$ + $<$1(%initial-action)>$ }; %printer { $$ - $<$1(DEAD %printer)>$ + $<$1(%printer)>$ } <> <*>; %lex-param { - $1(DEAD %lex-param) + $1(%lex-param) }; %parse-param { - $1(DEAD %parse-param) + $1(%parse-param) }; %% exp: @@ -2916,25 +2916,24 @@ exp: { $$; $][1; - $<$1(DEAD action 1)>$ - $<$1(DEAD action 2)>1 - $<$1(DEAD action 3)>name - $<$1(DEAD action 4)>0 + $<$1(action 1)>$ + $<$1(action 2)>1 + $<$1(action 3)>name + $<$1(action 4)>0 ; }; ]]) -# FIXME: Provide a means to iterate over all the skeletons. -AT_BISON_CHECK([[-d input.y]]) -AT_BISON_CHECK([[-d -S glr.c input.y]]) -AT_BISON_CHECK([[-d -S lalr1.cc input.y]]) -AT_BISON_CHECK([[-d -S glr.cc input.y]]) -AT_BISON_CHECK([[-d -S glr2.cc input.y]]) -AT_BISON_CHECK([[ -S lalr1.java input.y]]) +# Disable -Wother to avoid the warnings about the suspicious presence +# of `b4_canary` in the output. +m4_foreach([b4_skel], + [[yacc.c], [glr.c], [lalr1.cc], [glr.cc], [glr2.cc], [lalr1.d], [lalr1.java]], +[AT_BISON_CHECK([[-Wno-other -S ]b4_skel[ ]m4_bmatch(b4_skel, [.*\.java$], [], [-d])[ input.y]]) +]) ]) -AT_TEST([m4_errprintn]) -AT_TEST([@:>@m4_errprintn]) +AT_TEST([b4_canary]) +AT_TEST([@:>@b4_canary]) m4_popdef([AT_TEST]) |