summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2021-10-15 19:59:34 +0000
committerNicholas Clark <nick@ccl4.org>2021-10-19 06:09:02 +0000
commitbd09be5b1b65f68dd41433db47acee101aa69f7b (patch)
treeaebeb3770c02120cf13091e7d02e15e6713550e8
parent65b7cc064468ea58fd900e8293dfb0fdd1de8092 (diff)
downloadperl-bd09be5b1b65f68dd41433db47acee101aa69f7b.tar.gz
`for my($k, $v) (%hash)` should not be a syntax error
No-one had thought to test for this explicitly. After all, both `for my $foo ...` and `for my$foo ...` are valid syntax, so tweaking the C lexer code to add an optional '(' should work, surely? The problem was that, *as was*, the lexer code didn't "accept" those two syntax variants the way the comments would suggest. `for my $foo ...` was treated as 1) we saw 'my ' 2) we saw the dollar sign 3) success! but `for my$foo ...` was treated as 0) we didn't see 'my ' or 'our ' 1) we saw the literal string 'my' which is valid as a package name 2) we saw the dollar sign 3) success! ie some sort of mangled variant of `for my Dog $spot ...` without 'my' *but* as the lexer was happy with what it saw, it returned that the input stream was valid for a "for" token, and control continues to the grammar. The grammar, of course, didn't make these mistakes, so parsed everything properly and built the correct optree. (And, if presented with `for Dog $spot (...)` the grammar wouldn't match, so all invalid code was correctly rejected) However, all this came unstuck with `for my($k` because that didn't mis-tokenise as some crazy package name 'my(', so it reported a syntax error. Hence rewrite yyl_foreach() to actually tokenise everything correctly. "Correctly", to be clear, is bug-for-bug compatible with the current emergent behaviour for various corner cases for "parses and runs" vs "syntax error". We don't always report identical error messages for certain syntax errors, where the precise message reported was itself emergent behaviour from the bugs in the previous implementation.
-rw-r--r--t/comp/parser.t22
-rw-r--r--t/comp/parser_run.t6
-rw-r--r--t/op/for-many.t35
-rw-r--r--toke.c78
4 files changed, 112 insertions, 29 deletions
diff --git a/t/comp/parser.t b/t/comp/parser.t
index fbfe5398e3..8f4a484756 100644
--- a/t/comp/parser.t
+++ b/t/comp/parser.t
@@ -8,7 +8,7 @@ BEGIN {
chdir 't' if -d 't';
}
-print "1..190\n";
+print "1..191\n";
sub failed {
my ($got, $expected, $name) = @_;
@@ -334,15 +334,29 @@ like($@, qr/BEGIN failed--compilation aborted/, 'BEGIN 7' );
like($@, qr/Identifier too long/, "too long id in glob ctx");
eval qq[ for $xFC ];
- like($@, qr/Missing \$ on loop variable/,
+ like($@, qr/^Missing \$ on loop variable /,
"252 char id ok, but a different error");
eval qq[ for $xFD; ];
- like($@, qr/Identifier too long/, "too long id in for ctx");
+ like($@, qr/^Missing \$ on loop variable /, "too long id in for ctx");
# the specific case from the ticket
+ # however the parsing code in yyl_foreach has now changed
my $x = "x" x 257;
eval qq[ for $x ];
- like($@, qr/Identifier too long/, "too long id ticket case");
+ like($@, qr/^Missing \$ on loop variable /, "too long id ticket case");
+
+ # as PL_tokenbuf is now PL_parser->tokenbuf, the "buffer overflow" that was
+ # reported in GH #9993 now corrupts some other part of the parser structure.
+ # Currently, that seems to be the line number. Hence this test will fail if
+ # the fix from commit 0b3da58dfdc35079 is reversed. (However, as the later
+ # commit 61bc22580524a6d9 changed the code (now) in yyl_foreach() from
+ # scan_ident() to scan_word(), to recreate the problem one needs to apply
+ # the buggy change to the calculation of the variable `e` in scan_word()
+ # instead.
+
+ my $x = "x" x 260;
+ eval qq[ for my $x \$foo ];
+ like($@, qr/at \(eval \d+\) line 1[,.]/, "line number is reported correctly");
}
{
diff --git a/t/comp/parser_run.t b/t/comp/parser_run.t
index b36b5ad164..1f90e2b15e 100644
--- a/t/comp/parser_run.t
+++ b/t/comp/parser_run.t
@@ -15,13 +15,11 @@ plan(7);
# [perl #130814] can reallocate lineptr while looking ahead for
# "Missing $ on loop variable" diagnostic.
my $result = fresh_perl(
- " foreach m0\n\$" . ("v" x 0x2000),
+ " foreach my\n\$" . ("v" x 0x2000),
{ stderr => 1 },
);
is($result . "\n", <<EXPECT);
-syntax error at - line 3, near "foreach m0
-"
-Identifier too long at - line 3.
+Identifier too long at - line 2.
EXPECT
fresh_perl_is(<<'EOS', <<'EXPECT', {}, "check zero vars");
diff --git a/t/op/for-many.t b/t/op/for-many.t
index ccf61631c9..71e80b6ffa 100644
--- a/t/op/for-many.t
+++ b/t/op/for-many.t
@@ -500,4 +500,39 @@ is($continue, 'xx', 'continue reached twice');
is("@have", 'alpaca;guanaco llama;vicuña', 'comma test 42');
}
+# Spaces shouldn't trigger parsing errors:
+{
+ my @correct = ('Pointy', 'Up', 'Flamey', 'Down');
+
+ @have = ();
+
+ for my ($one) (@correct) {
+ push @have, $one;
+ }
+ is("@have", "@correct", 'for my ($one)');
+
+ @have = ();
+
+ for my($one) (@correct) {
+ push @have, $one;
+ }
+ is("@have", "@correct", 'for my($one)');
+
+ @have = ();
+
+ # This is lots of lovely whitespace:
+ for my
+ ($end, $orientation) (@correct) {
+ push @have, "$end end $orientation";
+ }
+ is("@have", "Pointy end Up Flamey end Down", 'for my ($one, $two)');
+
+ @have = ();
+
+ for my($end, $orientation) (@correct) {
+ push @have, "$end end $orientation";
+ }
+ is("@have", "Pointy end Up Flamey end Down", 'for my ($one, $two)');
+}
+
done_testing();
diff --git a/toke.c b/toke.c
index f3ce92f644..aa27c44950 100644
--- a/toke.c
+++ b/toke.c
@@ -6931,32 +6931,68 @@ yyl_foreach(pTHX_ char *s)
if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
char *p = s;
SSize_t s_off = s - SvPVX(PL_linestr);
- STRLEN len;
- bool permit_paren = FALSE;
-
- if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
- p += 2;
- permit_paren = TRUE;
+ bool paren_is_valid = FALSE;
+ bool maybe_package = FALSE;
+ bool saw_core = FALSE;
+ bool core_valid = FALSE;
+
+ if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
+ saw_core = TRUE;
+ p += 6;
+ }
+ if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
+ core_valid = TRUE;
+ paren_is_valid = TRUE;
+ if (isSPACE(p[2])) {
+ p = skipspace(p + 3);
+ maybe_package = TRUE;
+ }
+ else {
+ p += 2;
+ }
}
- else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
- p += 3;
+ else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
+ core_valid = TRUE;
+ if (isSPACE(p[3])) {
+ p = skipspace(p + 4);
+ maybe_package = TRUE;
+ }
+ else {
+ p += 3;
+ }
}
-
- p = skipspace(p);
- /* skip optional package name, as in "for my abc $x (..)" */
- if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
- permit_paren = FALSE;
- p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
- p = skipspace(p);
+ else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
+ core_valid = TRUE;
+ if (isSPACE(p[5])) {
+ p = skipspace(p + 6);
+ }
+ else {
+ p += 5;
+ }
}
- if (*p != '$' && *p != '\\') {
- if (permit_paren && *p == '(') {
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__FOR_LIST), "for my (...) is experimental");
- } else {
- Perl_croak(aTHX_ "Missing $ on loop variable");
+ if (saw_core && !core_valid) {
+ Perl_croak(aTHX_ "Missing $ on loop variable");
+ }
+
+ if (maybe_package && !saw_core) {
+ /* skip optional package name, as in "for my abc $x (..)" */
+ if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
+ STRLEN len;
+ p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
+ p = skipspace(p);
+ paren_is_valid = FALSE;
}
}
+
+ if (UNLIKELY(paren_is_valid && *p == '(')) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__FOR_LIST),
+ "for my (...) is experimental");
+ }
+ else if (UNLIKELY(*p != '$' && *p != '\\')) {
+ /* "for myfoo (" will end up here, but with p pointing at the 'f' */
+ Perl_croak(aTHX_ "Missing $ on loop variable");
+ }
/* The buffer may have been reallocated, update s */
s = SvPVX(PL_linestr) + s_off;
}