diff options
-rw-r--r-- | dquote_static.c | 29 | ||||
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 1 | ||||
-rw-r--r-- | pod/perldelta.pod | 5 | ||||
-rw-r--r-- | pod/perldiag.pod | 9 | ||||
-rw-r--r-- | proto.h | 7 | ||||
-rw-r--r-- | regcomp.c | 34 | ||||
-rw-r--r-- | t/re/reg_mesg.t | 4 | ||||
-rw-r--r-- | toke.c | 8 |
9 files changed, 96 insertions, 3 deletions
diff --git a/dquote_static.c b/dquote_static.c index 61845ccc92..5a22993ac0 100644 --- a/dquote_static.c +++ b/dquote_static.c @@ -297,6 +297,35 @@ S_grok_bslash_x(pTHX_ char **s, UV *uv, const char** error_msg, return TRUE; } +STATIC char* +S_form_short_octal_warning(pTHX_ + const char * const s, /* Points to first non-octal */ + const STRLEN len /* Length of octals string, so + (s-len) points to first + octal */ +) { + /* Return a character string consisting of a warning message for when a + * string constant in octal is weird, like "\078". */ + + const char * sans_leading_zeros = s - len; + + PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING; + + assert(*s == '8' || *s == '9'); + + /* Remove the leading zeros, retaining one zero so won't be zero length */ + while (*sans_leading_zeros == '0') sans_leading_zeros++; + if (sans_leading_zeros == s) { + sans_leading_zeros--; + } + + return Perl_form(aTHX_ + "'%.*s' resolved to '\\o{%.*s}%c'", + (int) (len + 2), s - len - 1, + (int) (s - sans_leading_zeros), sans_leading_zeros, + *s); +} + /* * Local variables: * c-indentation-style: bsd @@ -751,6 +751,8 @@ EMiR |bool |grok_bslash_x |NN char** s|NN UV* uv \ |const bool strict \ |const bool silence_non_portable \ |const bool utf8 +EMsPR |char*|form_short_octal_warning|NN const char * const s \ + |const STRLEN len #endif Apd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep @@ -961,6 +961,7 @@ #define _core_swash_init(a,b,c,d,e,f,g) Perl__core_swash_init(aTHX_ a,b,c,d,e,f,g) # endif # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) +#define form_short_octal_warning(a,b) S_form_short_octal_warning(aTHX_ a,b) #define grok_bslash_c(a,b,c) S_grok_bslash_c(aTHX_ a,b,c) #define grok_bslash_o(a,b,c,d,e,f,g) S_grok_bslash_o(aTHX_ a,b,c,d,e,f,g) #define grok_bslash_x(a,b,c,d,e,f,g) S_grok_bslash_x(aTHX_ a,b,c,d,e,f,g) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index ea4db738fb..a43cf72071 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -210,8 +210,13 @@ XXX L<message|perldiag/"message"> =item * +L<'%s' resolved to '\o{%s}%d'|perldiag/"'%s' resolved to '\o{%s}%d'"> + +=item * + XXX L<message|perldiag/"message"> + =back =head2 Changes to Existing Diagnostics diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 9e6ee34ece..2be0f791ae 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4313,6 +4313,15 @@ terminates. You might use ^# instead. See L<perlform>. search list. So the additional elements in the replacement list are meaningless. +=item '%s' resolved to '\o{%s}%d' + +(W misc, regexp) You wrote something like C<\08>, or C<\179> in a +double-quotish string. All but the last digit is treated as a single +character, specified in octal. The last digit is the next character in +the string. To tell Perl that this is indeed what you want, you can use +the C<\o{ }> syntax, or use exactly three digits to specify the octal +for the character. + =item Reversed %s= operator (W syntax) You wrote your assignment operator backwards. The = must @@ -6788,6 +6788,13 @@ PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name, #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) +STATIC char* S_form_short_octal_warning(pTHX_ const char * const s, const STRLEN len) + __attribute__warn_unused_result__ + __attribute__pure__ + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING \ + assert(s) + STATIC char S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning) __attribute__warn_unused_result__; @@ -535,6 +535,13 @@ static const scan_data_t zero_scan_data = Simple_vFAIL4(m, a1, a2, a3); \ } STMT_END +/* m is not necessarily a "literal string", in this macro */ +#define reg_warn_non_literal_string(loc, m) STMT_START { \ + const IV offset = loc - RExC_precomp; \ + Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION, \ + m, (int)offset, RExC_precomp, RExC_precomp + offset); \ +} STMT_END + #define ckWARNreg(loc,m) STMT_START { \ const IV offset = loc - RExC_precomp; \ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \ @@ -10700,6 +10707,15 @@ tryagain: REQUIRE_UTF8; } p += numlen; + if (SIZE_ONLY /* like \08, \178 */ + && numlen < 3 + && p < RExC_end + && isDIGIT(*p) && ckWARN(WARN_REGEXP)) + { + reg_warn_non_literal_string( + p + 1, + form_short_octal_warning(p, numlen)); + } } else { /* Not to be treated as an octal constant, go find backref */ @@ -12166,11 +12182,25 @@ parseit: numlen = (strict) ? 4 : 3; value = grok_oct(--RExC_parse, &numlen, &flags, NULL); RExC_parse += numlen; - if (strict) { - if (numlen != 3) { + if (numlen != 3) { + SAVEFREESV(listsv); /* In case warnings are fatalized */ + if (strict) { RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1; vFAIL("Need exactly 3 octal digits"); } + else if (! SIZE_ONLY /* like \08, \178 */ + && numlen < 3 + && RExC_parse < RExC_end + && isDIGIT(*RExC_parse) + && ckWARN(WARN_REGEXP)) + { + SAVEFREESV(RExC_rx_sv); + reg_warn_non_literal_string( + RExC_parse + 1, + form_short_octal_warning(RExC_parse, numlen)); + (void)ReREFCNT_inc(RExC_rx_sv); + } + SvREFCNT_inc_simple_void_NN(listsv); } if (PL_encoding && value < 0x100) goto recode_encoding; diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 7487421e28..14e9aceee2 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -165,6 +165,10 @@ my @warning = ( 'm/[\pM-x]/' => 'False [] range "\pM-" in regex; marked by {#} in m/[\pM-{#}x]/', "m'\\y'" => 'Unrecognized escape \y passed through in regex; marked by {#} in m/\y{#}/', '/x{3,1}/' => 'Quantifier {n,m} with n > m can\'t match in regex; marked by {#} in m/x{3,1}{#}/', + '/\08/' => '\'\08\' resolved to \'\o{0}8\' in regex; marked by {#} in m/\08{#}/', + '/\018/' => '\'\018\' resolved to \'\o{1}8\' in regex; marked by {#} in m/\018{#}/', + '/[\08]/' => '\'\08\' resolved to \'\o{0}8\' in regex; marked by {#} in m/[\08{#}]/', + '/[\018]/' => '\'\018\' resolved to \'\o{1}8\' in regex; marked by {#} in m/[\018{#}]/', '/(?[ \t ])/' => 'The regex_sets feature is experimental in regex; marked by {#} in m/(?[{#} \t ])/', ); @@ -3277,10 +3277,16 @@ S_scan_const(pTHX_ char *start) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { - I32 flags = 0; + I32 flags = PERL_SCAN_SILENT_ILLDIGIT; STRLEN len = 3; uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL)); s += len; + if (len < 3 && s < send && isDIGIT(*s) + && ckWARN(WARN_MISC)) + { + Perl_warner(aTHX_ packWARN(WARN_MISC), + "%s", form_short_octal_warning(s, len)); + } } goto NUM_ESCAPE_INSERT; |