summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--t/comp/parser_run.t39
-rw-r--r--t/re/pat.t30
-rw-r--r--toke.c91
3 files changed, 119 insertions, 41 deletions
diff --git a/t/comp/parser_run.t b/t/comp/parser_run.t
index 1f90e2b15e..6f2b41eff2 100644
--- a/t/comp/parser_run.t
+++ b/t/comp/parser_run.t
@@ -10,7 +10,7 @@ BEGIN {
set_up_inc( qw(. ../lib ) );
}
-plan(7);
+plan(70);
# [perl #130814] can reallocate lineptr while looking ahead for
# "Missing $ on loop variable" diagnostic.
@@ -22,11 +22,38 @@ is($result . "\n", <<EXPECT);
Identifier too long at - line 2.
EXPECT
-fresh_perl_is(<<'EOS', <<'EXPECT', {}, "check zero vars");
-print $001;
-EOS
-Numeric variables with more than one digit may not start with '0' at - line 1.
-EXPECT
+for my $var ('$00','${00}','$001','${001}','$01','${01}','$09324', '${09324}') {
+ for my $utf8 ("","use utf8;") {
+ for my $strict ("","use strict;") {
+ fresh_perl_is(
+ "${strict}${utf8}print $var;",
+ "Numeric variables with more than one digit may not start with '0' at - line 1.",
+ {},
+ sprintf("check %s is illegal%s%s", $var,
+ $utf8 ? " under utf8" : "",
+ $strict ? " under strict" : ""
+ ),
+ );
+ }
+ }
+}
+
+for my $var ('$0', '${0}', '$1', '${1}', '$10', '${10}', '$9324', '${9324}') {
+ for my $utf8 ("","use utf8;") {
+ for my $strict ("","use strict;") {
+ fresh_perl_is(
+ "${strict}${utf8} print '$var' if $var or !$var;",
+ $var,
+ {},
+ sprintf("check %s is legal%s%s", $var,
+ $utf8 ? " under utf8" : "",
+ $strict ? " under strict" : ""
+ )
+ );
+ }
+ }
+}
+
fresh_perl_is(<<EOS, <<'EXPECT', {}, "linestart before bufptr");
\${ \xB6eeeeeeeeeeee
diff --git a/t/re/pat.t b/t/re/pat.t
index 7292e4625a..2ce4ca8764 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -27,7 +27,7 @@ skip_all_without_unicode_tables();
my $has_locales = locales_enabled('LC_CTYPE');
-plan tests => 1046; # Update this when adding/deleting tests.
+plan tests => 1214; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -35,6 +35,34 @@ run_tests() unless caller;
# Tests start here.
#
sub run_tests {
+ {
+ # see https://github.com/Perl/perl5/issues/12948
+ my $string="ABCDEFGHIJKL";
+ my $pat= "(.)" x length($string);
+ my $ok= $string=~/^$pat\z/;
+ foreach my $n (1 .. length($string)) {
+ $ok= eval sprintf 'is $%d, "%s", q($%d = %s); 1', ($n, substr($string,$n-1,1))x2;
+ ok($ok, "eval for \$$n test");
+ $ok= eval sprintf 'is ${%d}, "%s", q(${%d} = %s); 1', ($n, substr($string,$n-1,1))x2;
+ ok($ok, "eval for \${$n} test");
+
+ $ok= eval sprintf 'is $0%d, "%s", q($0%d = %s); 1', ($n, substr($string,$n-1,1))x2;
+ ok(!$ok, "eval failed as expected for \$0$n test");
+ $ok= eval sprintf 'is ${0%d}, "%s", q(${0%d} = %s); 1', ($n, substr($string,$n-1,1))x2;
+ ok(!$ok, "eval failed as expected for \${0$n} test");
+
+ no strict 'refs';
+ $ok= eval sprintf 'is ${0b%b}, "%s", q(${0b%b} = %s); 1', ($n, substr($string,$n-1,1))x2;
+ ok($ok, sprintf "eval for \${0b%b} test", $n);
+ $ok= eval sprintf 'is ${0x%x}, "%s", q(${0x%x} = %s); 1', ($n, substr($string,$n-1,1))x2;
+ ok($ok, sprintf "eval for \${0x%x} test", $n);
+ $ok= eval sprintf 'is ${0b%08b}, "%s", q(${0b%08b} = %s); 1', ($n, substr($string,$n-1,1))x2;
+ ok($ok, sprintf "eval for \${0b%b} test", $n);
+ $ok= eval sprintf 'is ${0x%04x}, "%s", q(${0x%04x} = %s); 1', ($n, substr($string,$n-1,1))x2;
+ ok($ok, sprintf "eval for \${0x%04x} test", $n);
+ }
+ }
+
my $sharp_s = uni_to_native("\xdf");
{
diff --git a/toke.c b/toke.c
index a3740bb55d..d7f27a48de 100644
--- a/toke.c
+++ b/toke.c
@@ -10076,25 +10076,14 @@ Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STR
return s;
}
-/* Is the byte 'd' a legal single character identifier name? 'u' is true
- * iff Unicode semantics are to be used. The legal ones are any of:
- * a) all ASCII characters except:
- * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
- * 2) '{'
- * The final case currently doesn't get this far in the program, so we
- * don't test for it. If that were to change, it would be ok to allow it.
- * b) When not under Unicode rules, any upper Latin1 character
- * c) Otherwise, when unicode rules are used, all XIDS characters.
- *
- * Because all ASCII characters have the same representation whether
- * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- * '{' without knowing if is UTF-8 or not. */
-#define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8_safe(s, e) \
- : (isGRAPH_L1(*s) \
- && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+/* scan s and extract an identifier ($var) from it if possible
+ * into dest.
+ * XXX: This function has subtle implications on parsing, and
+ * changing how it behaves can cause a variable to change from
+ * being a run time rv2sv call or a compile time binding to a
+ * specific variable name.
+ */
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
{
@@ -10158,11 +10147,41 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
s = skipspace(s);
}
}
+
+
+ /* Extract the first character of the variable name from 's' and
+ * copy it, null terminated into 'd'. Note that this does not
+ * involve checking for just IDFIRST characters, as it allows the
+ * '^' for ${^FOO} type variable names, and it allows all the
+ * characters that are legal in a single character variable name.
+ *
+ * The legal ones are any of:
+ * a) all ASCII characters except:
+ * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
+ * 2) '{'
+ * The final case currently doesn't get this far in the program, so we
+ * don't test for it. If that were to change, it would be ok to allow it.
+ * b) When not under Unicode rules, any upper Latin1 character
+ * c) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ * Because all ASCII characters have the same representation whether
+ * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ * '{' without knowing if is UTF-8 or not. */
+
if ((s <= PL_bufend - ((is_utf8)
? UTF8SKIP(s)
: 1))
- && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
- {
+ && (
+ isGRAPH_A(*s)
+ ||
+ ( is_utf8
+ ? isIDFIRST_utf8_safe(s, PL_bufend)
+ : (isGRAPH_L1(*s)
+ && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
+ )
+ )
+ )
+ ){
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
@@ -10172,24 +10191,27 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
}
else {
*d = *s++;
- /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
- if (isDIGIT(*d)) {
- bool is_zero= *d == '0' ? TRUE : FALSE;
- char *digit_start= d;
- while (s < PL_bufend && isDIGIT(*s)) {
- d++;
- if (d >= e)
- Perl_croak(aTHX_ "%s", ident_too_long);
- *d= *s++;
- }
- if (is_zero && d - digit_start > 1)
- Perl_croak(aTHX_ ident_var_zero_multi_digit);
- }
d[1] = '\0';
}
}
+
+ /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
+ if (isDIGIT(*d)) {
+ bool is_zero= *d == '0' ? TRUE : FALSE;
+ char *digit_start= d;
+ while (s < PL_bufend && isDIGIT(*s)) {
+ d++;
+ if (d >= e)
+ Perl_croak(aTHX_ "%s", ident_too_long);
+ *d= *s++;
+ }
+ if (is_zero && d - digit_start >= 1) /* d points at the last digit */
+ Perl_croak(aTHX_ ident_var_zero_multi_digit);
+ d[1] = '\0';
+ }
+
/* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
- if (*d == '^' && *s && isCONTROLVAR(*s)) {
+ else if (*d == '^' && *s && isCONTROLVAR(*s)) {
*d = toCTRL(*s);
s++;
}
@@ -10198,6 +10220,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
about when not to warn. */
else if (ck_uni && bracket == -1)
check_uni();
+
if (bracket != -1) {
bool skip;
char *s2;