diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-02-09 21:18:48 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-02-09 22:46:03 -0700 |
commit | 17a3df4c6a07533e2c03c46fdd27e3ee295d61d0 (patch) | |
tree | e0ee85739319695ef7c320b28890ad2f49ebade2 | |
parent | 3efe3cb8c0229e34f5e21774151ddbfdcf27adf4 (diff) | |
download | perl-17a3df4c6a07533e2c03c46fdd27e3ee295d61d0.tar.gz |
Fix up \cX for 5.14
Throughout 5.13 there was temporary code to deprecate and forbid
certain values of X following a \c in qq strings. This patch fixes
this to the final 5.14 semantics.
These are:
1) a utf8 non-ASCII character will croak. This is the same
behavior as pre-5.13, but it gives a correct error message, rather than
the malformed utf8 message previously.
2) \c{ and \cX where X is above ASCII will generate a deprecated
message. The intent is to remove these capabilities in 5.16. The
original agreement was to croak on above ASCII, but that does violate
our stability policy, so I'm deprecating it instead.
3) A non-deprecated warning is generated for all other \cX; this is the
same as throughout the 5.13 series.
I did not have the tuits to use \c{} as I had planned in 5.14, but \N{}
can be used instead.
-rw-r--r-- | dquote_static.c | 28 | ||||
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | pod/perldiag.pod | 21 | ||||
-rw-r--r-- | pod/perlop.pod | 3 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | regcomp.c | 4 | ||||
-rw-r--r-- | t/lib/warnings/toke | 16 | ||||
-rw-r--r-- | t/op/qq.t | 22 | ||||
-rw-r--r-- | t/porting/diag.t | 1 | ||||
-rw-r--r-- | t/re/re_tests | 4 | ||||
-rw-r--r-- | toke.c | 2 |
12 files changed, 62 insertions, 45 deletions
diff --git a/dquote_static.c b/dquote_static.c index b5a2cccee6..e23ec4623a 100644 --- a/dquote_static.c +++ b/dquote_static.c @@ -39,26 +39,34 @@ S_regcurly(pTHX_ register const char *s) /* XXX Add documentation after final interface and behavior is decided */ /* May want to show context for error, so would pass Perl_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning) U8 source = *current; - - May want to add eg, WARN_REGEX */ STATIC char -S_grok_bslash_c(pTHX_ const char source, const bool output_warning) +S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning) { U8 result; - if (! isASCII(source)) { - Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII"); + if (utf8) { + /* Trying to deprecate non-ASCII usages. This construct has never + * worked for a utf8 variant. So, even though are accepting non-ASCII + * Latin1 in 5.14, no need to make them work under utf8 */ + if (! isASCII(source)) { + Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII"); + } } result = toCTRL(source); - if (! isCNTRL(result)) { + if (! isASCII(source)) { + Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), + "Character following \"\\c\" must be ASCII"); + } + else if (! isCNTRL(result) && output_warning) { if (source == '{') { - Perl_croak(aTHX_ "It is proposed that \"\\c{\" no longer be valid. It has historically evaluated to\n \";\". If you disagree with this proposal, send email to perl5-porters@perl.org\nOtherwise, or in the meantime, you can work around this failure by changing\n\"\\c{\" to \";\""); + Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), + "\"\\c{\" is deprecated and is more clearly written as \";\""); } - else if (output_warning) { + else { U8 clearer[3]; U8 i = 0; if (! isALNUM(result)) { @@ -67,8 +75,8 @@ S_grok_bslash_c(pTHX_ const char source, const bool output_warning) clearer[i++] = result; clearer[i++] = '\0'; - Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), - "\"\\c%c\" more clearly written simply as \"%s\"", + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "\"\\c%c\" is more clearly written simply as \"%s\"", source, clearer); } @@ -657,7 +657,7 @@ p |OP* |localize |NN OP *o|I32 lex ApdR |I32 |looks_like_number|NN SV *const sv Apd |UV |grok_bin |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result #ifdef PERL_IN_DQUOTE_STATIC_C -EXMsR |char |grok_bslash_c |const char source|const bool output_warning +EXMsR |char |grok_bslash_c |const char source|const bool utf8|const bool output_warning EXMsR |bool |grok_bslash_o |NN const char* s|NN UV* uv|NN STRLEN* len|NN const char** error_msg|const bool output_warning #endif Apd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result @@ -859,7 +859,7 @@ # endif # endif # if defined(PERL_IN_DQUOTE_STATIC_C) -#define grok_bslash_c(a,b) S_grok_bslash_c(aTHX_ a,b) +#define grok_bslash_c(a,b,c) S_grok_bslash_c(aTHX_ a,b,c) #define grok_bslash_o(a,b,c,d,e) S_grok_bslash_o(aTHX_ a,b,c,d,e) #define regcurly(a) S_regcurly(aTHX_ a) # endif diff --git a/pod/perldiag.pod b/pod/perldiag.pod index bf22e1e511..3d35b1ce6f 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1258,7 +1258,12 @@ Perhaps you need to copy the value to a temporary, and repeat that. =item Character following "\c" must be ASCII -(F) In C<\cI<X>>, I<X> must be an ASCII character. +(F|W deprecated, syntax) In C<\cI<X>>, I<X> must be an ASCII character. +It is planned to make this fatal in all instances in Perl 5.16. In the +cases where it isn't fatal, the character this evaluates to is +derived by exclusive or'ing the code point of this character with 0x40. + +Note that non-alphabetic ASCII characters are discouraged here as well. =item Character in 'C' format wrapped in pack @@ -1498,12 +1503,20 @@ valid magic number. you have also specified an explicit size for the string. See L<perlfunc/pack>. -=item "\c%c" more clearly written simply as "%s" +=item "\c{" is deprecated and is more clearly written as ";" + +(D deprecated, syntax) The C<\cI<X>> construct is intended to be a way +to specify non-printable characters. You used it with a "{" which +evaluates to ";", which is printable. It is planned to remove the +ability to specify a semi-colon this way in Perl 5.16. Just use a +semi-colon or a backslash-semi-colon without the "\c". + +=item "\c%c" is more clearly written simply as "%s" -(D deprecated) The C<\cI<X>> construct is intended to be a way to specify +(W syntax) The C<\cI<X>> construct is intended to be a way to specify non-printable characters. You used it for a printable one, which is better written as simply itself, perhaps preceded by a backslash for non-word -characters. This message may not remain as Deprecated beyond 5.13. +characters. =item Deep recursion on subroutine "%s" diff --git a/pod/perlop.pod b/pod/perlop.pod index eb71b89a83..e11fff2019 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1114,7 +1114,8 @@ L<perlebcdic/OPERATOR DIFFERENCES> for the complete list of what these sequences mean on both ASCII and EBCDIC platforms. Use of any other character following the "c" besides those listed above is -discouraged, and may become deprecated or forbidden. What happens for those +discouraged, and some are deprecated with the intention of removing +those in Perl 5.16. What happens for any of these other characters currently though, is that the value is derived by inverting the 7th bit (0x40). @@ -5182,7 +5182,7 @@ STATIC I32 S_do_trans_simple_utf8(pTHX_ SV * const sv) #endif #if defined(PERL_IN_DQUOTE_STATIC_C) -STATIC char S_grok_bslash_c(pTHX_ const char source, const bool output_warning) +STATIC char S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning) __attribute__warn_unused_result__; STATIC bool S_grok_bslash_o(pTHX_ const char* s, UV* uv, STRLEN* len, const char** error_msg, const bool output_warning) @@ -8522,7 +8522,7 @@ tryagain: break; case 'c': p++; - ender = grok_bslash_c(*p++, SIZE_ONLY); + ender = grok_bslash_c(*p++, UTF, SIZE_ONLY); break; case '0': case '1': case '2': case '3':case '4': case '5': case '6': case '7': case '8':case '9': @@ -9287,7 +9287,7 @@ parseit: goto recode_encoding; break; case 'c': - value = grok_bslash_c(*RExC_parse++, SIZE_ONLY); + value = grok_bslash_c(*RExC_parse++, UTF, SIZE_ONLY); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index f37c36e7df..425b613485 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -940,3 +940,19 @@ Use of ?PATTERN? without explicit operator is deprecated at - line 4. yes no no +######## +# toke.c +use warnings; +my $a = "\c{ack}"; +$a = "\c,"; +$a = "\c`"; +no warnings 'syntax'; +$a = "\c{ack}"; +$a = "\c,"; +$a = "\c`"; +no warnings 'deprecated'; +EXPECT +"\c{" is deprecated and is more clearly written as ";" at - line 3. +"\c," is more clearly written simply as "l" at - line 4. +"\c`" is more clearly written simply as "\ " at - line 5. +"\c{" is deprecated and is more clearly written as ";" at - line 7. @@ -5,7 +5,7 @@ BEGIN { @INC = '../lib'; } -print q(1..30 +print q(1..28 ); # This is() function is written to avoid "" @@ -68,26 +68,6 @@ is ("a\o{120}b", "a" . chr(0x50) . "b"); is ("a\o{400}b", "a" . chr(0x100) . "b"); is ("a\o{1000}b", "a" . chr(0x200) . "b"); -# These kludged tests should change when we remove the temporary fatal error -# in util.c for "\c{". And, the warning there should probably not be -# deprecated; See [perl #75138]. -# BE SURE TO remove the message from the __DATA__ section of porting/diag.t, -# and to verify the messages in util.c are adequately covered in -# perldiag.pod, and to revise the explanatory wording that is there now. -my $value = eval '"\c{ACK}"'; -if ($^V lt v5.13.0 || $^V ge v5.14.0) { - is ($@, ""); - is ($value, ";ACK}"); -} -elsif ($@ ne "") { # 5.13 series, expect the eval to fail, so pass it. - is ("1", "1"); # This .t only has 'is' at its disposal - is ("1", "1"); -} -else { # Something wrong; someone has removed the failure in util.c - is ("Should fail for 5.13 until fix test", "0"); - is ("1", "1"); -} - # This caused a memory fault no warnings "utf8"; is ("abc", eval qq[qq\x{8000_0000}abc\x{8000_0000}]) diff --git a/t/porting/diag.t b/t/porting/diag.t index 1bedf73aa6..f001d94ed1 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -432,7 +432,6 @@ Invalid type '%c' in pack Invalid type '%c' in %s Invalid type '%c' in unpack Invalid type ',' in %s -It is proposed that "\c{" no longer be valid. It has historically evaluated to ";". If you disagree with this proposal, send email to perl5-porters@perl.org Otherwise, or in the meantime, you can work around this failure by changing "\c{" to ";" 'j' not supported on this platform 'J' not supported on this platform leaving effective gid failed diff --git a/t/re/re_tests b/t/re/re_tests index bc4a7fc7c7..c80667fee1 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1451,8 +1451,8 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer # Verify works in single quotish context; regex compiler delivers slightly different msg # \N{U+BEEF.BEAD} succeeds here, because can't completely hide it from the outside. \N{U+0xBEEF} - c - Invalid hexadecimal number -\c` - c - \"\\c`\" more clearly written simply as \"\\ \" -\c1 - c - \"\\c1\" more clearly written simply as \"q\" +\c` - c - \"\\c`\" is more clearly written simply as \"\\ \" +\c1 - c - \"\\c1\" is more clearly written simply as \"q\" \cA \001 y $& \1 \400 \x{100} y $& \x{100} @@ -3363,7 +3363,7 @@ S_scan_const(pTHX_ char *start) case 'c': s++; if (s < send) { - *d++ = grok_bslash_c(*s++, 1); + *d++ = grok_bslash_c(*s++, has_utf8, 1); } else { yyerror("Missing control char name in \\c"); |