From 609122bd053b77225626a0b7630039ddc39620ae Mon Sep 17 00:00:00 2001 From: Karl Date: Tue, 13 Jan 2009 17:51:53 -0700 Subject: Add warning about octal > 377 in some regexes (Tweaked by rgs) Message-ID: <496D3F02.6020204@khwilliamson.com> --- pod/perldiag.pod | 9 +++++++++ regcomp.c | 13 +++++++++++++ t/op/pat.t | 9 ++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index e06dd5e2f6..cb82633194 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4777,6 +4777,15 @@ In code that currently says C you should remove AutoLoader from @ISA and change C to C. +=item Use of octal value above 377 is deprecated + +(D deprecated, W regexp) There is a constant in the regular expression whose +value is interpeted by Perl as octal and larger than 377 (255 decimal, 0xFF +hex). Perl may take this to mean different things depending on the rest of +the regular expression. If you meant such an octal value, convert it to +hexadecimal and use C<\xHH> or C<\x{HH}> instead. If you meant to have +part of it mean a backreference, use C<\g> for that. See L. + =item Use of %s in printf format not supported (F) You attempted to use a feature of printf that is accessible from diff --git a/regcomp.c b/regcomp.c index 3ad1f3b07d..e06152820c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -7430,6 +7430,19 @@ tryagain: I32 flags = 0; STRLEN numlen = 3; ender = grok_oct(p, &numlen, &flags, NULL); + + /* An octal above 0xff is interpreted differently + * depending on if the re is in utf8 or not. If it + * is in utf8, the value will be itself, otherwise + * it is interpreted as modulo 0x100. It has been + * decided to discourage the use of octal above the + * single-byte range. For now, warn only when + * it ends up modulo */ + if (SIZE_ONLY && ender >= 0x100 + && ! UTF && ! PL_encoding + && ckWARN2(WARN_DEPRECATED, WARN_REGEXP)) { + vWARNdep(p, "Use of octal value above 377 is deprecated"); + } p += numlen; } else { diff --git a/t/op/pat.t b/t/op/pat.t index 586b31788f..eb1034a77d 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -13,7 +13,7 @@ sub run_tests; $| = 1; -my $EXPECTED_TESTS = 3961; # Update this when adding/deleting tests. +my $EXPECTED_TESTS = 3962; # Update this when adding/deleting tests. BEGIN { chdir 't' if -d 't'; @@ -3978,6 +3978,13 @@ sub run_tests { iseq $te [0], '../'; } + # This currently has to come before any "use encoding" in this file. + { + local $Message; + local $BugId = '59342'; + must_warn 'qr/\400/', '^Use of octal value above 377'; + } + SKIP: { # XXX: This set of tests is essentially broken, POSIX character classes -- cgit v1.2.1