summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkim Demaille <demaille@gostai.com>2008-10-23 20:01:48 -0500
committerAkim Demaille <demaille@gostai.com>2008-11-15 14:30:05 +0100
commitcb823b6f0c5d025e3667f994222037476c41ea1a (patch)
tree6386f30270f5acaaa2d48297855c237b3ebd6a5a
parent7b6e67533e54ba06ffe69664dee57a39021d2b8d (diff)
downloadbison-cb823b6f0c5d025e3667f994222037476c41ea1a.tar.gz
Support parametric types.
There are two issues to handle: first scanning nested angle bracket pairs to support types such as std::pair< std::string, std::list<std::string> > >. Another issue is to address idiosyncracies of C++: do not glue two closing angle brackets together (otherwise it's operator>>), and avoid sticking blindly a TYPE to the opening <, as it can result in '<:' which is a digraph for '['. * src/scan-gram.l (brace_level): Rename as... (nesting): this. (SC_TAG): New. Implement support for complex tags. (tag): Accept \n, but not <. * data/lalr1.cc (b4_symbol_value, b4_symbol_value_template) (b4_symbol_variant): Leave space around types as parameters. * examples/variant.yy: Use nested template types and leading ::. * src/parse-gram.y (TYPE, TYPE_TAG_ANY, TYPE_TAG_NONE, type.opt): Rename as... (TAG, TAG_ANY, TAG_NONE, tag.opt): these. * tests/c++.at: Test parametric types.
-rw-r--r--ChangeLog25
-rw-r--r--data/lalr1.cc6
-rw-r--r--examples/variant.yy9
-rw-r--r--src/parse-gram.y30
-rw-r--r--src/scan-gram.l80
-rw-r--r--tests/c++.at24
6 files changed, 127 insertions, 47 deletions
diff --git a/ChangeLog b/ChangeLog
index 70fd84d8..4543fdd4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2008-11-15 Akim Demaille <demaille@gostai.com>
+
+ Support parametric types.
+ There are two issues to handle: first scanning nested angle bracket pairs
+ to support types such as std::pair< std::string, std::list<std::string> > >.
+
+ Another issue is to address idiosyncracies of C++: do not glue two closing
+ angle brackets together (otherwise it's operator>>), and avoid sticking
+ blindly a TYPE to the opening <, as it can result in '<:' which is a
+ digraph for '['.
+
+ * src/scan-gram.l (brace_level): Rename as...
+ (nesting): this.
+ (SC_TAG): New.
+ Implement support for complex tags.
+ (tag): Accept
+ , but not <.
+ * data/lalr1.cc (b4_symbol_value, b4_symbol_value_template)
+ (b4_symbol_variant): Leave space around types as parameters.
+ * examples/variant.yy: Use nested template types and leading ::.
+ * src/parse-gram.y (TYPE, TYPE_TAG_ANY, TYPE_TAG_NONE, type.opt):
+ Rename as...
+ (TAG, TAG_ANY, TAG_NONE, tag.opt): these.
+ * tests/c++.at: Test parametric types.
+
2008-11-15 Akim Demaille <akim@betelgeuse.gostai.ensta.fr>
Test token.prefix.
diff --git a/data/lalr1.cc b/data/lalr1.cc
index c3678887..6e5042d4 100644
--- a/data/lalr1.cc
+++ b/data/lalr1.cc
@@ -72,7 +72,7 @@ b4_variant_if([
# ----------------------------
m4_define([b4_symbol_value],
[m4_ifval([$2],
- [$1.as<$2>()],
+ [$1.as< $2 >()],
[$1])])
# b4_symbol_value_template(VAL, [TYPE])
@@ -80,7 +80,7 @@ b4_variant_if([
# Same as b4_symbol_value, but used in a template method.
m4_define([b4_symbol_value_template],
[m4_ifval([$2],
- [$1.template as<$2>()],
+ [$1.template as< $2 >()],
[$1])])
]) # b4_variant_if
@@ -366,7 +366,7 @@ m4_map([b4_symbol_constructor_definition_], m4_defn([b4_symbol_numbers]))])])
# YYTYPE.
m4_define([b4_symbol_variant],
[m4_pushdef([b4_dollar_dollar],
- [$2.$3<$][3>(m4_shift3($@))])dnl
+ [$2.$3< $][3 >(m4_shift3($@))])dnl
switch ($1)
{
m4_map([b4_type_action_], m4_defn([b4_type_names]))[]dnl
diff --git a/examples/variant.yy b/examples/variant.yy
index 016c4b2a..fe5dce0f 100644
--- a/examples/variant.yy
+++ b/examples/variant.yy
@@ -46,13 +46,14 @@ typedef std::list<std::string> strings_type;
}
}
-%token <std::string> TEXT;
+%token <::std::string> TEXT;
%token <int> NUMBER;
-%printer { debug_stream () << $$; } <int> <std::string> <strings_type>;
+%printer { debug_stream () << $$; }
+ <int> <::std::string> <::std::list<std::string>>;
%token END_OF_FILE 0;
-%type <std::string> item;
-%type <strings_type> list;
+%type <::std::string> item;
+%type <::std::list<std::string>> list;
%%
diff --git a/src/parse-gram.y b/src/parse-gram.y
index 8e3b7320..cada04f7 100644
--- a/src/parse-gram.y
+++ b/src/parse-gram.y
@@ -167,9 +167,9 @@ static int current_prec = 0;
%token PIPE "|"
%token PROLOGUE "%{...%}"
%token SEMICOLON ";"
-%token TYPE "type"
-%token TYPE_TAG_ANY "<*>"
-%token TYPE_TAG_NONE "<>"
+%token TAG "<tag>"
+%token TAG_ANY "<*>"
+%token TAG_NONE "<>"
%type <character> CHAR
%printer { fputs (char_name ($$), stderr); } CHAR
@@ -183,8 +183,8 @@ static int current_prec = 0;
%printer { fprintf (stderr, "{\n%s\n}", $$); }
braceless content.opt "{...}" "%{...%}" EPILOGUE
-%type <uniqstr> TYPE ID ID_COLON variable
-%printer { fprintf (stderr, "<%s>", $$); } TYPE
+%type <uniqstr> TAG ID ID_COLON variable
+%printer { fprintf (stderr, "<%s>", $$); } TAG
%printer { fputs ($$, stderr); } ID variable
%printer { fprintf (stderr, "%s:", $$); } ID_COLON
@@ -387,7 +387,7 @@ symbol_declaration:
current_class = unknown_sym;
current_type = NULL;
}
-| "%type" TYPE symbols.1
+| "%type" TAG symbols.1
{
symbol_list *list;
tag_seen = true;
@@ -398,7 +398,7 @@ symbol_declaration:
;
precedence_declaration:
- precedence_declarator type.opt symbols.prec
+ precedence_declarator tag.opt symbols.prec
{
symbol_list *list;
++current_prec;
@@ -419,9 +419,9 @@ precedence_declarator:
| "%precedence" { $$ = precedence_assoc; }
;
-type.opt:
+tag.opt:
/* Nothing. */ { current_type = NULL; }
-| TYPE { current_type = $1; tag_seen = true; }
+| TAG { current_type = $1; tag_seen = true; }
;
/* Just like symbols.1 but accept INT for the sake of POSIX. */
@@ -451,15 +451,15 @@ generic_symlist:
;
generic_symlist_item:
- symbol { $$ = symbol_list_sym_new ($1, @1); }
-| TYPE { $$ = symbol_list_type_new ($1, @1); }
-| "<*>" { $$ = symbol_list_default_tagged_new (@1); }
-| "<>" { $$ = symbol_list_default_tagless_new (@1); }
+ symbol { $$ = symbol_list_sym_new ($1, @1); }
+| TAG { $$ = symbol_list_type_new ($1, @1); }
+| "<*>" { $$ = symbol_list_default_tagged_new (@1); }
+| "<>" { $$ = symbol_list_default_tagless_new (@1); }
;
/* One token definition. */
symbol_def:
- TYPE
+ TAG
{
current_type = $1;
tag_seen = true;
@@ -538,7 +538,7 @@ rhs:
{ grammar_current_rule_prec_set ($3, @3); }
| rhs "%dprec" INT
{ grammar_current_rule_dprec_set ($3, @3); }
-| rhs "%merge" TYPE
+| rhs "%merge" TAG
{ grammar_current_rule_merge_set ($3, @3); }
;
diff --git a/src/scan-gram.l b/src/scan-gram.l
index 7ea0d685..5ef8edba 100644
--- a/src/scan-gram.l
+++ b/src/scan-gram.l
@@ -78,6 +78,8 @@ static void unexpected_newline (boundary, char const *);
/* A identifier was just read in directives/rules. Special state
to capture the sequence `identifier :'. */
%x SC_AFTER_IDENTIFIER
+ /* A complex tag, with nested angles brackets. */
+%x SC_TAG
/* Three types of user code:
- prologue (code between `%{' `%}' in the first section, before %%);
@@ -96,8 +98,10 @@ int [0-9]+
/* POSIX says that a tag must be both an id and a C union member, but
historically almost any character is allowed in a tag. We disallow
- NUL and newline, as this simplifies our implementation. */
-tag [^\0\n>]+
+ NUL, as this simplifies our implementation. We disallow angle
+ bracket to match them in nested pairs: several languages use them
+ for generics/template types. */
+tag [^\0<>]+
/* Zero or more instances of backslash-newline. Following GCC, allow
white space between the backslash and the newline. */
@@ -105,8 +109,9 @@ splice (\\[ \f\t\v]*\n)*
%%
%{
- /* Nesting level of the current code in braces. */
- int braces_level IF_LINT (= 0);
+ /* Nesting level. Either for nested braces, or nested angle brackets
+ (but not mixed). */
+ int nesting IF_LINT (= 0);
/* Parent context state, when applicable. */
int context_state IF_LINT (= 0);
@@ -205,8 +210,6 @@ splice (\\[ \f\t\v]*\n)*
"=" return EQUAL;
"|" return PIPE;
";" return SEMICOLON;
- "<*>" return TYPE_TAG_ANY;
- "<>" return TYPE_TAG_NONE;
{id} {
val->uniqstr = uniqstr_new (yytext);
@@ -235,18 +238,25 @@ splice (\\[ \f\t\v]*\n)*
/* Code in between braces. */
"{" {
STRING_GROW;
- braces_level = 0;
+ nesting = 0;
code_start = loc->start;
BEGIN SC_BRACED_CODE;
}
/* A type. */
+ "<*>" return TAG_ANY;
+ "<>" return TAG_NONE;
"<"{tag}">" {
obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
STRING_FINISH;
val->uniqstr = uniqstr_new (last_string);
STRING_FREE;
- return TYPE;
+ return TAG;
+ }
+ "<" {
+ nesting = 0;
+ token_start = loc->start;
+ BEGIN SC_TAG;
}
"%%" {
@@ -267,6 +277,17 @@ splice (\\[ \f\t\v]*\n)*
}
+ /*--------------------------------------------------------------.
+ | Supporting \0 complexifies our implementation for no expected |
+ | added value. |
+ `--------------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+{
+ \0 complain_at (*loc, _("invalid null character"));
+}
+
+
/*-----------------------------------------------------------------.
| Scanning after an identifier, checking whether a colon is next. |
`-----------------------------------------------------------------*/
@@ -386,11 +407,40 @@ splice (\\[ \f\t\v]*\n)*
}
}
-<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
+ /*-----------------------------------------------------------.
+ | Scanning a Bison nested tag. The initial angle bracket is |
+ | already eaten. |
+ `-----------------------------------------------------------*/
+
+<SC_TAG>
{
- \0 complain_at (*loc, _("invalid null character"));
-}
+ ">" {
+ --nesting;
+ if (nesting < 0)
+ {
+ STRING_FINISH;
+ loc->start = token_start;
+ val->uniqstr = uniqstr_new (last_string);
+ STRING_FREE;
+ BEGIN INITIAL;
+ return TAG;
+ }
+ STRING_GROW;
+ }
+
+ [^<>]+ STRING_GROW;
+ "<"+ STRING_GROW; nesting += yyleng;
+ <<EOF>> {
+ unexpected_eof (token_start, ">");
+ STRING_FINISH;
+ loc->start = token_start;
+ val->uniqstr = uniqstr_new (last_string);
+ STRING_FREE;
+ BEGIN INITIAL;
+ return TAG;
+ }
+}
/*----------------------------.
| Decode escaped characters. |
@@ -509,13 +559,13 @@ splice (\\[ \f\t\v]*\n)*
<SC_BRACED_CODE>
{
- "{"|"<"{splice}"%" STRING_GROW; braces_level++;
- "%"{splice}">" STRING_GROW; braces_level--;
+ "{"|"<"{splice}"%" STRING_GROW; nesting++;
+ "%"{splice}">" STRING_GROW; nesting--;
"}" {
obstack_1grow (&obstack_for_string, '}');
- --braces_level;
- if (braces_level < 0)
+ --nesting;
+ if (nesting < 0)
{
STRING_FINISH;
loc->start = code_start;
diff --git a/tests/c++.at b/tests/c++.at
index d236f619..0cfcaf05 100644
--- a/tests/c++.at
+++ b/tests/c++.at
@@ -51,7 +51,7 @@ typedef std::list<std::string> strings_type;
#include <iterator>
#include <sstream>
- static
+ static
#if defined USE_LEX_SYMBOL
yy::parser::symbol_type yylex ();
#else
@@ -86,26 +86,30 @@ typedef std::list<std::string> strings_type;
%token <std::string> TEXT;
%token <int> NUMBER;
-%printer { debug_stream() << $][$; } <int> <std::string> <strings_type>;
%token END_OF_FILE 0;
%type <std::string> item;
-%type <strings_type> list result;
+// Using the template type to exercize its parsing.
+// Starting with :: to ensure we don't output "<::" which starts by the
+// digraph for the left square bracket.
+%type <::std::list<std::string>> list result;
+%printer { debug_stream() << $][$; }
+ <int> <::std::string> <::std::list<::std::string>>;
%%
result:
- list { std::cout << $][1; }
+ list { std::cout << $][1; }
;
list:
- /* nothing */ { /* Generates an empty string list */ }
-| list item { std::swap($][$,$][1); $$.push_back($][2); }
+ /* nothing */ { /* Generates an empty string list */ }
+| list item { std::swap($][$,$][1); $$.push_back($][2); }
;
item:
- TEXT { std::swap($][$,$][1); }
-| NUMBER { $][$ = string_cast($][1); }
+ TEXT { std::swap($][$,$][1); }
+| NUMBER { $][$ = string_cast($][1); }
;
%%
@@ -164,7 +168,7 @@ yy::parser::token_type yylex(yy::parser::semantic_type* yylval,
void
yy::parser::error(const yy::parser::location_type&,
- const std::string& message)
+ const std::string& message)
{
std::cerr << message << std::endl;
}
@@ -363,5 +367,5 @@ AT_CHECK_NAMESPACE([[foo: :bar]], [[-]])
# contains single occurrences of `:'.
AT_CHECK_NAMESPACE([[foo[3]::bar::baz]], [[-]])
AT_CHECK_NAMESPACE([[foo::bar,baz]], [[-]])
-AT_CHECK_NAMESPACE([[foo::bar::(baz]], [[-]])
+AT_CHECK_NAMESPACE([[foo::bar::(baz /* Pacify Emacs ) */]], [[-]])
AT_CLEANUP