diff options
-rw-r--r-- | t/comp/parser_run.t | 39 | ||||
-rw-r--r-- | t/re/pat.t | 30 | ||||
-rw-r--r-- | toke.c | 91 |
3 files changed, 119 insertions, 41 deletions
diff --git a/t/comp/parser_run.t b/t/comp/parser_run.t index 1f90e2b15e..6f2b41eff2 100644 --- a/t/comp/parser_run.t +++ b/t/comp/parser_run.t @@ -10,7 +10,7 @@ BEGIN { set_up_inc( qw(. ../lib ) ); } -plan(7); +plan(70); # [perl #130814] can reallocate lineptr while looking ahead for # "Missing $ on loop variable" diagnostic. @@ -22,11 +22,38 @@ is($result . "\n", <<EXPECT); Identifier too long at - line 2. EXPECT -fresh_perl_is(<<'EOS', <<'EXPECT', {}, "check zero vars"); -print $001; -EOS -Numeric variables with more than one digit may not start with '0' at - line 1. -EXPECT +for my $var ('$00','${00}','$001','${001}','$01','${01}','$09324', '${09324}') { + for my $utf8 ("","use utf8;") { + for my $strict ("","use strict;") { + fresh_perl_is( + "${strict}${utf8}print $var;", + "Numeric variables with more than one digit may not start with '0' at - line 1.", + {}, + sprintf("check %s is illegal%s%s", $var, + $utf8 ? " under utf8" : "", + $strict ? " under strict" : "" + ), + ); + } + } +} + +for my $var ('$0', '${0}', '$1', '${1}', '$10', '${10}', '$9324', '${9324}') { + for my $utf8 ("","use utf8;") { + for my $strict ("","use strict;") { + fresh_perl_is( + "${strict}${utf8} print '$var' if $var or !$var;", + $var, + {}, + sprintf("check %s is legal%s%s", $var, + $utf8 ? " under utf8" : "", + $strict ? " under strict" : "" + ) + ); + } + } +} + fresh_perl_is(<<EOS, <<'EXPECT', {}, "linestart before bufptr"); \${ \xB6eeeeeeeeeeee diff --git a/t/re/pat.t b/t/re/pat.t index 7292e4625a..2ce4ca8764 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -27,7 +27,7 @@ skip_all_without_unicode_tables(); my $has_locales = locales_enabled('LC_CTYPE'); -plan tests => 1046; # Update this when adding/deleting tests. +plan tests => 1214; # Update this when adding/deleting tests. run_tests() unless caller; @@ -35,6 +35,34 @@ run_tests() unless caller; # Tests start here. # sub run_tests { + { + # see https://github.com/Perl/perl5/issues/12948 + my $string="ABCDEFGHIJKL"; + my $pat= "(.)" x length($string); + my $ok= $string=~/^$pat\z/; + foreach my $n (1 .. length($string)) { + $ok= eval sprintf 'is $%d, "%s", q($%d = %s); 1', ($n, substr($string,$n-1,1))x2; + ok($ok, "eval for \$$n test"); + $ok= eval sprintf 'is ${%d}, "%s", q(${%d} = %s); 1', ($n, substr($string,$n-1,1))x2; + ok($ok, "eval for \${$n} test"); + + $ok= eval sprintf 'is $0%d, "%s", q($0%d = %s); 1', ($n, substr($string,$n-1,1))x2; + ok(!$ok, "eval failed as expected for \$0$n test"); + $ok= eval sprintf 'is ${0%d}, "%s", q(${0%d} = %s); 1', ($n, substr($string,$n-1,1))x2; + ok(!$ok, "eval failed as expected for \${0$n} test"); + + no strict 'refs'; + $ok= eval sprintf 'is ${0b%b}, "%s", q(${0b%b} = %s); 1', ($n, substr($string,$n-1,1))x2; + ok($ok, sprintf "eval for \${0b%b} test", $n); + $ok= eval sprintf 'is ${0x%x}, "%s", q(${0x%x} = %s); 1', ($n, substr($string,$n-1,1))x2; + ok($ok, sprintf "eval for \${0x%x} test", $n); + $ok= eval sprintf 'is ${0b%08b}, "%s", q(${0b%08b} = %s); 1', ($n, substr($string,$n-1,1))x2; + ok($ok, sprintf "eval for \${0b%b} test", $n); + $ok= eval sprintf 'is ${0x%04x}, "%s", q(${0x%04x} = %s); 1', ($n, substr($string,$n-1,1))x2; + ok($ok, sprintf "eval for \${0x%04x} test", $n); + } + } + my $sharp_s = uni_to_native("\xdf"); { @@ -10076,25 +10076,14 @@ Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STR return s; } -/* Is the byte 'd' a legal single character identifier name? 'u' is true - * iff Unicode semantics are to be used. The legal ones are any of: - * a) all ASCII characters except: - * 1) control and space-type ones, like NUL, SOH, \t, and SPACE; - * 2) '{' - * The final case currently doesn't get this far in the program, so we - * don't test for it. If that were to change, it would be ok to allow it. - * b) When not under Unicode rules, any upper Latin1 character - * c) Otherwise, when unicode rules are used, all XIDS characters. - * - * Because all ASCII characters have the same representation whether - * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and - * '{' without knowing if is UTF-8 or not. */ -#define VALID_LEN_ONE_IDENT(s, e, is_utf8) \ - (isGRAPH_A(*(s)) || ((is_utf8) \ - ? isIDFIRST_utf8_safe(s, e) \ - : (isGRAPH_L1(*s) \ - && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) +/* scan s and extract an identifier ($var) from it if possible + * into dest. + * XXX: This function has subtle implications on parsing, and + * changing how it behaves can cause a variable to change from + * being a run time rv2sv call or a compile time binding to a + * specific variable name. + */ STATIC char * S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) { @@ -10158,11 +10147,41 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) s = skipspace(s); } } + + + /* Extract the first character of the variable name from 's' and + * copy it, null terminated into 'd'. Note that this does not + * involve checking for just IDFIRST characters, as it allows the + * '^' for ${^FOO} type variable names, and it allows all the + * characters that are legal in a single character variable name. + * + * The legal ones are any of: + * a) all ASCII characters except: + * 1) control and space-type ones, like NUL, SOH, \t, and SPACE; + * 2) '{' + * The final case currently doesn't get this far in the program, so we + * don't test for it. If that were to change, it would be ok to allow it. + * b) When not under Unicode rules, any upper Latin1 character + * c) Otherwise, when unicode rules are used, all XIDS characters. + * + * Because all ASCII characters have the same representation whether + * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and + * '{' without knowing if is UTF-8 or not. */ + if ((s <= PL_bufend - ((is_utf8) ? UTF8SKIP(s) : 1)) - && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8)) - { + && ( + isGRAPH_A(*s) + || + ( is_utf8 + ? isIDFIRST_utf8_safe(s, PL_bufend) + : (isGRAPH_L1(*s) + && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD)) + ) + ) + ) + ){ if (is_utf8) { const STRLEN skip = UTF8SKIP(s); STRLEN i; @@ -10172,24 +10191,27 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) } else { *d = *s++; - /* special case to handle ${10}, ${11} the same way we handle ${1} etc */ - if (isDIGIT(*d)) { - bool is_zero= *d == '0' ? TRUE : FALSE; - char *digit_start= d; - while (s < PL_bufend && isDIGIT(*s)) { - d++; - if (d >= e) - Perl_croak(aTHX_ "%s", ident_too_long); - *d= *s++; - } - if (is_zero && d - digit_start > 1) - Perl_croak(aTHX_ ident_var_zero_multi_digit); - } d[1] = '\0'; } } + + /* special case to handle ${10}, ${11} the same way we handle ${1} etc */ + if (isDIGIT(*d)) { + bool is_zero= *d == '0' ? TRUE : FALSE; + char *digit_start= d; + while (s < PL_bufend && isDIGIT(*s)) { + d++; + if (d >= e) + Perl_croak(aTHX_ "%s", ident_too_long); + *d= *s++; + } + if (is_zero && d - digit_start >= 1) /* d points at the last digit */ + Perl_croak(aTHX_ ident_var_zero_multi_digit); + d[1] = '\0'; + } + /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ - if (*d == '^' && *s && isCONTROLVAR(*s)) { + else if (*d == '^' && *s && isCONTROLVAR(*s)) { *d = toCTRL(*s); s++; } @@ -10198,6 +10220,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) about when not to warn. */ else if (ck_uni && bracket == -1) check_uni(); + if (bracket != -1) { bool skip; char *s2; |