summaryrefslogtreecommitdiff
path: root/toke.c
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2021-10-15 19:59:34 +0000
committerNicholas Clark <nick@ccl4.org>2021-10-19 06:09:02 +0000
commitbd09be5b1b65f68dd41433db47acee101aa69f7b (patch)
treeaebeb3770c02120cf13091e7d02e15e6713550e8 /toke.c
parent65b7cc064468ea58fd900e8293dfb0fdd1de8092 (diff)
downloadperl-bd09be5b1b65f68dd41433db47acee101aa69f7b.tar.gz
`for my($k, $v) (%hash)` should not be a syntax error
No-one had thought to test for this explicitly. After all, both `for my $foo ...` and `for my$foo ...` are valid syntax, so tweaking the C lexer code to add an optional '(' should work, surely? The problem was that, *as was*, the lexer code didn't "accept" those two syntax variants the way the comments would suggest. `for my $foo ...` was treated as 1) we saw 'my ' 2) we saw the dollar sign 3) success! but `for my$foo ...` was treated as 0) we didn't see 'my ' or 'our ' 1) we saw the literal string 'my' which is valid as a package name 2) we saw the dollar sign 3) success! ie some sort of mangled variant of `for my Dog $spot ...` without 'my' *but* as the lexer was happy with what it saw, it returned that the input stream was valid for a "for" token, and control continues to the grammar. The grammar, of course, didn't make these mistakes, so parsed everything properly and built the correct optree. (And, if presented with `for Dog $spot (...)` the grammar wouldn't match, so all invalid code was correctly rejected) However, all this came unstuck with `for my($k` because that didn't mis-tokenise as some crazy package name 'my(', so it reported a syntax error. Hence rewrite yyl_foreach() to actually tokenise everything correctly. "Correctly", to be clear, is bug-for-bug compatible with the current emergent behaviour for various corner cases for "parses and runs" vs "syntax error". We don't always report identical error messages for certain syntax errors, where the precise message reported was itself emergent behaviour from the bugs in the previous implementation.
Diffstat (limited to 'toke.c')
-rw-r--r--toke.c78
1 files changed, 57 insertions, 21 deletions
diff --git a/toke.c b/toke.c
index f3ce92f644..aa27c44950 100644
--- a/toke.c
+++ b/toke.c
@@ -6931,32 +6931,68 @@ yyl_foreach(pTHX_ char *s)
if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
char *p = s;
SSize_t s_off = s - SvPVX(PL_linestr);
- STRLEN len;
- bool permit_paren = FALSE;
-
- if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
- p += 2;
- permit_paren = TRUE;
+ bool paren_is_valid = FALSE;
+ bool maybe_package = FALSE;
+ bool saw_core = FALSE;
+ bool core_valid = FALSE;
+
+ if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
+ saw_core = TRUE;
+ p += 6;
+ }
+ if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
+ core_valid = TRUE;
+ paren_is_valid = TRUE;
+ if (isSPACE(p[2])) {
+ p = skipspace(p + 3);
+ maybe_package = TRUE;
+ }
+ else {
+ p += 2;
+ }
}
- else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
- p += 3;
+ else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
+ core_valid = TRUE;
+ if (isSPACE(p[3])) {
+ p = skipspace(p + 4);
+ maybe_package = TRUE;
+ }
+ else {
+ p += 3;
+ }
}
-
- p = skipspace(p);
- /* skip optional package name, as in "for my abc $x (..)" */
- if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
- permit_paren = FALSE;
- p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
- p = skipspace(p);
+ else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
+ core_valid = TRUE;
+ if (isSPACE(p[5])) {
+ p = skipspace(p + 6);
+ }
+ else {
+ p += 5;
+ }
}
- if (*p != '$' && *p != '\\') {
- if (permit_paren && *p == '(') {
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__FOR_LIST), "for my (...) is experimental");
- } else {
- Perl_croak(aTHX_ "Missing $ on loop variable");
+ if (saw_core && !core_valid) {
+ Perl_croak(aTHX_ "Missing $ on loop variable");
+ }
+
+ if (maybe_package && !saw_core) {
+ /* skip optional package name, as in "for my abc $x (..)" */
+ if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
+ STRLEN len;
+ p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
+ p = skipspace(p);
+ paren_is_valid = FALSE;
}
}
+
+ if (UNLIKELY(paren_is_valid && *p == '(')) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__FOR_LIST),
+ "for my (...) is experimental");
+ }
+ else if (UNLIKELY(*p != '$' && *p != '\\')) {
+ /* "for myfoo (" will end up here, but with p pointing at the 'f' */
+ Perl_croak(aTHX_ "Missing $ on loop variable");
+ }
/* The buffer may have been reallocated, update s */
s = SvPVX(PL_linestr) + s_off;
}