summaryrefslogtreecommitdiff
path: root/perly.y
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2012-08-08 00:36:57 -0700
committerFather Chrysostomos <sprout@cpan.org>2012-08-08 12:24:52 -0700
commit705fe0e5f8a324e10c292190237cac35c5af4109 (patch)
treedb09024bcb99fdc42022347ec8e508f5ca2de30b /perly.y
parent35f7559499c4a614ddae483553149a29d9c78c13 (diff)
downloadperl-705fe0e5f8a324e10c292190237cac35c5af4109.tar.gz
Don’t let format arguments ‘leak out’ of formline
When parsing formats, the lexer invents tokens to feed to the parser. So when the lexer dissects this: format = @<<<< @>>>> $foo, $bar, $baz . The parser actually sees this (the parser knows that = . is like { }): format = ; formline "@<<<< @>>>>\n", $foo, $bar, $baz; . The lexer makes no effort to make sure that the argument line is con- tained within formline’s arguments. To make { do_stuff; $foo, bar } work, the lexer supplies a ‘do’ before the block, if it is inside a format. This means that $a, $b; $c, $d feeds ($a, $b) to formline, wheras { $a, $b; $c, $d } feeds ($c, $d) to formline. It also has various other strange effects: This script prints "# 0" as I would expect: print "# "; format = @ (0 and die) . write This one, locking parentheses, dies because ‘and’ has low precedence: print "# "; format = @ 0 and die . write This does not work: my $day = "Wed"; format = @<<<<<<<<<< ({qw[ Sun 0 Mon 1 Tue 2 Wed 3 Thu 4 Fri 5 Sat 6 ]}->{$day}) . write You have to do this: my $day = "Wed"; format = @<<<<<<<<<< ({my %d = qw[ Sun 0 Mon 1 Tue 2 Wed 3 Thu 4 Fri 5 Sat 6 ]; \%d}->{$day}) . write which is very strange and shouldn’t even be valid syntax. This does not work, because ‘no’ is not allowed in an expression: use strict; $::foo = "bar" format = @<<<<<<<<<<< no strict; $foo . write; Putting a block around it makes it work. Putting a semicolon before ‘no’ stop it from being a syntax error, but it silently does the wrong thing. I thought I could fix all these by putting an implicit do { ... } around the argument line and removing the special-casing for an open- ing brace, allowing anonymous hashrefs to work in formats, such that this: format = @<<<< @>>>> $foo, $bar, $baz . would turn into this: format = ; formline "@<<<< @>>>>\n", do { $foo, $bar, $baz; }; . But that will lead to madness like this ‘working’: format = @ }+do{ . It would also stop lexicals declared in one format line from being visible in another. So instead this commit starts being honest with the parser. We still have some ‘invented’ tokens, to indicate the start and end of a format line, but now it is the parser itself that understands a sequence of format lines, instead of being fed generated code. So the example above is now presented to the parser like this: format = ; FORMRBRACK "@<<<< @>>>>\n" FORMLBRACK $foo, $bar, $baz ; FORMRBRACK ; . Note about the semicolons: The parser expects to see a semicolon at the end of each statement. So the lexer has to supply one before FORMRBRACK. The final dot goes through the same code that handles closing braces, which generates a semicolon for the same reason. It’s easier to make the parser expect a semicolon before the final dot than to change the } code in the lexer. We use the } code for . because it handles the internal variables that keep track of how many nested lev- els there, what kind, etc. The extra ;FORMRBRACK after the = is there also to keep the lexer sim- ple (ahem). When a newline is encountered during ‘normal’ (as opposed to format picture) parsing inside a format, that’s when the semicolon and FORMRBRACK are emitted. (There was already a semicolon there before this commit. I have just added FORMRBRACK in the same spot.)
Diffstat (limited to 'perly.y')
-rw-r--r--perly.y50
1 files changed, 46 insertions, 4 deletions
diff --git a/perly.y b/perly.y
index b9bc0eb859..04170b32f6 100644
--- a/perly.y
+++ b/perly.y
@@ -85,7 +85,7 @@
%token <i_tkval> RELOP EQOP MULOP ADDOP
%token <i_tkval> DOLSHARP DO HASHBRACK NOAMP
%token <i_tkval> LOCAL MY MYSUB REQUIRE
-%token <i_tkval> COLONATTR
+%token <i_tkval> COLONATTR FORMLBRACK FORMRBRACK
%type <ival> grammar remember mremember
%type <ival> startsub startanonsub startformsub
@@ -99,6 +99,7 @@
%type <opval> formname subname proto subbody cont my_scalar formblock
%type <opval> subattrlist myattrlist myattrterm myterm
%type <opval> termbinop termunop anonymous termdo
+%type <opval> formstmtseq formline formarg
%nonassoc <i_tkval> PREC_LOW
%nonassoc LOOPEX
@@ -212,12 +213,12 @@ block : '{' remember stmtseq '}'
;
/* format body */
-formblock: '=' remember stmtseq '.'
+formblock: '=' remember ';' FORMRBRACK formstmtseq ';' '.'
{ if (PL_parser->copline > (line_t)IVAL($1))
PL_parser->copline = (line_t)IVAL($1);
- $$ = block_end($2, $3);
+ $$ = block_end($2, $5);
TOKEN_GETMAD($1,$$,'{');
- TOKEN_GETMAD($4,$$,'}');
+ TOKEN_GETMAD($7,$$,'}');
}
;
@@ -249,6 +250,17 @@ stmtseq : /* NULL */
}
;
+/* A sequence of format lines */
+formstmtseq: /* NULL */
+ { $$ = (OP*)NULL; }
+ | formstmtseq formline
+ { $$ = op_append_list(OP_LINESEQ, $1, $2);
+ PL_pad_reset_pending = TRUE;
+ if ($1 && $2)
+ PL_hints |= HINT_BLOCK_SCOPE;
+ }
+ ;
+
/* A statement in the program, including optional labels */
fullstmt: barestmt
{
@@ -506,6 +518,36 @@ barestmt: PLUGSTMT
}
;
+/* Format line */
+formline: THING formarg
+ { OP *list;
+ if ($2) {
+ OP *term = $2;
+ DO_MAD(term = newUNOP(OP_NULL, 0, term));
+ list = op_append_elem(OP_LIST, $1, term);
+ }
+ else {
+#ifdef MAD
+ OP *op = newNULLLIST();
+ list = op_append_elem(OP_LIST, $1, op);
+#else
+ list = $1;
+#endif
+ }
+ if (PL_parser->copline == NOLINE)
+ PL_parser->copline = CopLINE(PL_curcop)-1;
+ else PL_parser->copline--;
+ $$ = newSTATEOP(0, NULL,
+ convert(OP_FORMLINE, 0, list));
+ }
+ ;
+
+formarg : /* NULL */
+ { $$ = NULL; }
+ | FORMLBRACK stmtseq FORMRBRACK
+ { $$ = op_unscope($2); }
+ ;
+
/* An expression which may have a side-effect */
sideff : error
{ $$ = (OP*)NULL; }