diff options
author | David Mitchell <davem@iabyn.com> | 2015-06-19 12:47:05 +0100 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2015-06-19 12:47:05 +0100 |
commit | 33c28ab263ac8bba71954d61ec55d7f1dc6c0eca (patch) | |
tree | 96f97216db61bab1ff879fb662e18d1c64db471d | |
parent | 9558026484c47d197ababb92c9e5477b379f7c42 (diff) | |
download | perl-33c28ab263ac8bba71954d61ec55d7f1dc6c0eca.tar.gz |
remove deprecated /\C/ RE character class
This horrible thing broke encapsulation and was as buggy as a very buggy
thing. It's been officially deprecated since 5.20.0 and now it can finally
die die die!!!!
-rw-r--r-- | dump.c | 1 | ||||
-rw-r--r-- | ext/XS-APItest/t/callregexec.t | 6 | ||||
-rw-r--r-- | pod/perldebguts.pod | 1 | ||||
-rw-r--r-- | pod/perldelta.pod | 8 | ||||
-rw-r--r-- | pod/perldiag.pod | 17 | ||||
-rw-r--r-- | pod/perlre.pod | 5 | ||||
-rw-r--r-- | pod/perlrebackslash.pod | 14 | ||||
-rw-r--r-- | pod/perlreref.pod | 2 | ||||
-rw-r--r-- | pod/perlretut.pod | 4 | ||||
-rw-r--r-- | regcomp.c | 21 | ||||
-rw-r--r-- | regcomp.h | 3 | ||||
-rw-r--r-- | regcomp.sym | 1 | ||||
-rw-r--r-- | regexec.c | 47 | ||||
-rw-r--r-- | regnodes.h | 320 | ||||
-rw-r--r-- | t/lib/Cname.pm | 10 | ||||
-rw-r--r-- | t/op/bop.t | 36 | ||||
-rw-r--r-- | t/re/pat_advanced.t | 83 | ||||
-rw-r--r-- | t/re/pat_rt_report.t | 69 |
18 files changed, 185 insertions, 463 deletions
@@ -1415,7 +1415,6 @@ const struct flag_to_name regexp_core_intflags_names[] = { {PREGf_CUTGROUP_SEEN, "CUTGROUP_SEEN,"}, {PREGf_USE_RE_EVAL, "USE_RE_EVAL,"}, {PREGf_NOSCAN, "NOSCAN,"}, - {PREGf_CANY_SEEN, "CANY_SEEN,"}, {PREGf_GPOS_SEEN, "GPOS_SEEN,"}, {PREGf_GPOS_FLOAT, "GPOS_FLOAT,"}, {PREGf_ANCH_MBOL, "ANCH_MBOL,"}, diff --git a/ext/XS-APItest/t/callregexec.t b/ext/XS-APItest/t/callregexec.t index 74e1e206df..22446b66f5 100644 --- a/ext/XS-APItest/t/callregexec.t +++ b/ext/XS-APItest/t/callregexec.t @@ -10,7 +10,7 @@ use strict; use XS::APItest; *callregexec = *XS::APItest::callregexec; -use Test::More tests => 50; +use Test::More tests => 48; # Test that the regex engine can handle strings without terminating \0 # XXX This is by no means comprehensive; it doesn't test all ops, nor all @@ -42,10 +42,6 @@ sub try { try "ax", qr/a$/m, 1, 'MEOL'; try "ax", qr/a$/s, 1, 'SEOL'; try "abx", qr/^(ab|X)./s, 0, 'SANY'; - { - no warnings 'deprecated'; - try "abx", qr/^(ab|X)\C/, 0, 'CANY'; - } try "abx", qr/^(ab|X)./, 0, 'REG_ANY'; try "abx", qr/^ab(c|d|e|x)/, 0, 'TRIE/TRIEC'; try "abx", qr/^abx/, 0, 'EXACT'; diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod index 2b5561d2d7..064af64b55 100644 --- a/pod/perldebguts.pod +++ b/pod/perldebguts.pod @@ -592,7 +592,6 @@ will be lost. # [Special] alternatives: REG_ANY no Match any one character (except newline). SANY no Match any one character. - CANY no Match any one byte. ANYOF sv 1 Match character in (or not in) this class, single char match only ANYOFL sv 1 Like ANYOF, but /l is in effect diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 2cb9d723cd..f7cd398586 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -45,7 +45,13 @@ XXX For a release on a stable branch, this section aspires to be: If any exist, they are bugs, and we request that you submit a report. See L</Reporting Bugs> below. -[ List each incompatible change as a =head2 entry ] +=head2 The C</\C/> character class has been removed. + +This regular expression character class was deprecated in v5.20.0 and has +produced a deprecation warning since v5.22.0. It is now a compile-time +error. If you need to examine the individual bytes that make up a +UTF8-encoded character, then use C<utf8::encode()> on the string (or a +copy) first. =head1 Deprecations diff --git a/pod/perldiag.pod b/pod/perldiag.pod index ab94d597b7..1d53e5df92 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1559,15 +1559,14 @@ defined in the C<:alias> import argument to C<use charnames>, but they could be defined by a translator installed into C<$^H{charnames}>. See L<charnames/CUSTOM ALIASES>. -=item \C is deprecated in regex; marked by S<<-- HERE> in m/%s/ - -(D deprecated, regexp) The \C character class is deprecated, and will -become a compile-time error in a future release of perl (tentatively -v5.24). This construct allows you to match a single byte of what makes -up a multi-byte single UTF8 character, and breaks encapsulation. It is -currently also very buggy. If you really need to process the individual -bytes, you probably want to convert your string to one where each -underlying byte is stored as a character, with utf8::encode(). +=item \C no longer supported in regex; marked by S<<-- HERE> in m/%s/ + +(F) The \C character class used to allow a match of single byte within a +multi-byte utf-8 character, but was removed in v5.24 as it broke +encapsulation and its implementation was extremely buggy. If you really +need to process the individual bytes, you probably want to convert your +string to one where each underlying byte is stored as a character, with +utf8::encode(). =item "\c%c" is more clearly written simply as "%s" diff --git a/pod/perlre.pod b/pod/perlre.pod index 2cf00fbd6d..a262b4cfc9 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -706,11 +706,6 @@ X<\g> X<\k> X<\K> X<backreference> \pP [3] Match P, named property. Use \p{Prop} for longer names \PP [3] Match non-P \X [4] Match Unicode "eXtended grapheme cluster" - \C Match a single C-language char (octet) even if that is - part of a larger UTF-8 character. Thus it breaks up - characters into their UTF-8 bytes, so you may end up - with malformed pieces of UTF-8. Unsupported in - lookbehind. (Deprecated.) \1 [5] Backreference to a specific capture group or buffer. '1' may actually be any positive integer. \g1 [5] Backreference to a specific or previous group, diff --git a/pod/perlrebackslash.pod b/pod/perlrebackslash.pod index c216f255e0..08b9abc431 100644 --- a/pod/perlrebackslash.pod +++ b/pod/perlrebackslash.pod @@ -69,8 +69,6 @@ as C<Not in [].> \b{}, \b Boundary. (\b is a backspace in []). \B{}, \B Not a boundary. Not in []. \cX Control-X. - \C Single octet, even under UTF-8. Not in []. - (Deprecated) \d Character class for digits. \D Character class for non-digits. \e Escape character. @@ -663,18 +661,6 @@ categories above. These are: =over 4 -=item \C - -(Deprecated.) C<\C> always matches a single octet, even if the source -string is encoded -in UTF-8 format, and the character to be matched is a multi-octet character. -This is very dangerous, because it violates -the logical character abstraction and can cause UTF-8 sequences to become malformed. - -Use C<utf8::encode()> instead. - -Mnemonic: oI<C>tet. - =item \K This appeared in perl 5.10.0. Anything matched left of C<\K> is diff --git a/pod/perlreref.pod b/pod/perlreref.pod index 848185e3a5..e9b784e445 100644 --- a/pod/perlreref.pod +++ b/pod/perlreref.pod @@ -144,8 +144,6 @@ and L<perlunicode> for details. \V A non vertical whitespace \R A generic newline (?>\v|\x0D\x0A) - \C Match a byte (with Unicode, '.' matches a character) - (Deprecated.) \pP Match P-named (Unicode) property \p{...} Match Unicode property with name longer than 1 character \PP Match non-P diff --git a/pod/perlretut.pod b/pod/perlretut.pod index cb399ab631..9a3c696e0f 100644 --- a/pod/perlretut.pod +++ b/pod/perlretut.pod @@ -2295,10 +2295,6 @@ They evaluate true if the regexps do I<not> match: $x =~ /foo(?!baz)/; # matches, 'baz' doesn't follow 'foo' $x =~ /(?<!\s)foo/; # matches, there is no \s before 'foo' -The C<\C> is unsupported in lookbehind, because the already -treacherous definition of C<\C> would become even more so -when going backwards. - Here is an example where a string containing blank-separated words, numbers and single dashes is to be split into its components. Using C</\s+/> alone won't work, because spaces are not required between @@ -808,9 +808,6 @@ static const scan_data_t zero_scan_data = if (RExC_seen & REG_GPOS_SEEN) \ PerlIO_printf(Perl_debug_log,"REG_GPOS_SEEN "); \ \ - if (RExC_seen & REG_CANY_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_CANY_SEEN "); \ - \ if (RExC_seen & REG_RECURSE_SEEN) \ PerlIO_printf(Perl_debug_log,"REG_RECURSE_SEEN "); \ \ @@ -5069,7 +5066,6 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n", Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); #endif - case CANY: case SANY: if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ ssc_match_all_cp(data->start_class); @@ -7288,8 +7284,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, lookbehind */ if (pRExC_state->num_code_blocks) r->extflags |= RXf_EVAL_SEEN; - if (RExC_seen & REG_CANY_SEEN) - r->intflags |= PREGf_CANY_SEEN; if (RExC_seen & REG_VERBARG_SEEN) { r->intflags |= PREGf_VERBARG_SEEN; @@ -7701,13 +7695,8 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, sv_setpvn(sv, s, i); TAINT_set(oldtainted); #endif - if ( (rx->intflags & PREGf_CANY_SEEN) - ? (RXp_MATCH_UTF8(rx) - && (!i || is_utf8_string((U8*)s, i))) - : (RXp_MATCH_UTF8(rx)) ) - { + if (RXp_MATCH_UTF8(rx)) SvUTF8_on(sv); - } else SvUTF8_off(sv); if (TAINTING_get) { @@ -11807,13 +11796,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) RExC_seen_zerolen++; /* Do not optimize RE away */ goto finish_meta_pat; case 'C': - ret = reg_node(pRExC_state, CANY); - RExC_seen |= REG_CANY_SEEN; - *flagp |= HASWIDTH|SIMPLE; - if (PASS2) { - ckWARNdep(RExC_parse+1, "\\C is deprecated"); - } - goto finish_meta_pat; + vFAIL("\\C no longer supported"); case 'X': ret = reg_node(pRExC_state, CLUMP); *flagp |= HASWIDTH; @@ -134,7 +134,7 @@ #define PREGf_USE_RE_EVAL 0x00000020 /* compiled with "use re 'eval'" */ /* these used to be extflags, but are now intflags */ #define PREGf_NOSCAN 0x00000040 -#define PREGf_CANY_SEEN 0x00000080 + /* spare */ #define PREGf_GPOS_SEEN 0x00000100 #define PREGf_GPOS_FLOAT 0x00000200 @@ -597,7 +597,6 @@ struct regnode_ssc { #define REG_LOOKBEHIND_SEEN 0x00000002 #define REG_GPOS_SEEN 0x00000004 /* spare */ -#define REG_CANY_SEEN 0x00000010 #define REG_RECURSE_SEEN 0x00000020 #define REG_TOP_LEVEL_BRANCHES_SEEN 0x00000040 #define REG_VERBARG_SEEN 0x00000080 diff --git a/regcomp.sym b/regcomp.sym index f79b87485c..ffcb53b21c 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -56,7 +56,6 @@ NBOUNDA NBOUND, no ; Match "" betweeen any \w\w or \W\W, where \w #* [Special] alternatives: REG_ANY REG_ANY, no 0 S ; Match any one character (except newline). SANY REG_ANY, no 0 S ; Match any one character. -CANY REG_ANY, no 0 S ; Match any one byte. ANYOF ANYOF, sv 1 S ; Match character in (or not in) this class, single char match only ANYOFL ANYOF, sv 1 S ; Like ANYOF, but /l is in effect @@ -770,9 +770,7 @@ Perl_re_intuit_start(pTHX_ * caller will have set strpos=pos()-4; we look for the substr * at position pos()-4+1, which lines up with the "a" */ - if (prog->check_offset_min == prog->check_offset_max - && !(prog->intflags & PREGf_CANY_SEEN)) - { + if (prog->check_offset_min == prog->check_offset_max) { /* Substring at constant offset from beg-of-str... */ SSize_t slen = SvCUR(check); char *s = HOP3c(strpos, prog->check_offset_min, strend); @@ -863,17 +861,10 @@ Perl_re_intuit_start(pTHX_ (IV)prog->check_end_shift); }); - if (prog->intflags & PREGf_CANY_SEEN) { - start_point= (U8*)(rx_origin + start_shift); - end_point= (U8*)(strend - end_shift); - if (start_point > end_point) - goto fail_finish; - } else { - end_point = HOP3(strend, -end_shift, strbeg); - start_point = HOPMAYBE3(rx_origin, start_shift, end_point); - if (!start_point) - goto fail_finish; - } + end_point = HOP3(strend, -end_shift, strbeg); + start_point = HOPMAYBE3(rx_origin, start_shift, end_point); + if (!start_point) + goto fail_finish; /* If the regex is absolutely anchored to either the start of the @@ -1841,14 +1832,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s)); } break; - case CANY: - REXEC_FBC_SCAN( - if (tmp && (reginfo->intuit || regtry(reginfo, &s))) - goto got_it; - else - tmp = doevery; - ); - break; case EXACTFA_NO_TRIE: /* This node only generated for non-utf8 patterns */ assert(! is_utf8_pat); @@ -3266,7 +3249,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, if (minlen) { const OPCODE op = OP(progi->regstclass); /* don't bother with what can't match */ - if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE) + if (PL_regkind[op] != EXACT && PL_regkind[op] != TRIE) strend = HOPc(strend, -(minlen - 1)); } DEBUG_EXECUTE_r({ @@ -3822,7 +3805,7 @@ S_dump_exec_pos(pTHX_ const char *locinput, if (pref0_len > pref_len) pref0_len = pref_len; { - const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0; + const int is_uni = utf8_target ? 1 : 0; RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0), (locinput - pref_len),pref0_len, 60, 4, 5); @@ -4986,12 +4969,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) sayNO; goto increment_locinput; - case CANY: /* \C */ - if (NEXTCHR_IS_EOS) - sayNO; - locinput++; - break; - case REG_ANY: /* /./ */ if ((NEXTCHR_IS_EOS) || nextchr == '\n') sayNO; @@ -8105,16 +8082,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, else scan = loceol; break; - case CANY: /* Move <scan> forward <max> bytes, unless goes off end */ - if (utf8_target && loceol - scan > max) { - - /* <loceol> hadn't been adjusted in the UTF-8 case */ - scan += max; - } - else { - scan = loceol; - } - break; case EXACTL: _CHECK_AND_WARN_PROBLEMATIC_LOCALE; if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) { diff --git a/regnodes.h b/regnodes.h index 3c9b991295..db32920c8c 100644 --- a/regnodes.h +++ b/regnodes.h @@ -6,8 +6,8 @@ /* Regops and State definitions */ -#define REGNODE_MAX 93 -#define REGMATCH_STATE_MAX 133 +#define REGNODE_MAX 92 +#define REGMATCH_STATE_MAX 132 #define END 0 /* 0000 End of program. */ #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */ @@ -29,82 +29,81 @@ #define NBOUNDA 15 /* 0x0f Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] */ #define REG_ANY 16 /* 0x10 Match any one character (except newline). */ #define SANY 17 /* 0x11 Match any one character. */ -#define CANY 18 /* 0x12 Match any one byte. */ -#define ANYOF 19 /* 0x13 Match character in (or not in) this class, single char match only */ -#define ANYOFL 20 /* 0x14 Like ANYOF, but /l is in effect */ -#define POSIXD 21 /* 0x15 Some [[:class:]] under /d; the FLAGS field gives which one */ -#define POSIXL 22 /* 0x16 Some [[:class:]] under /l; the FLAGS field gives which one */ -#define POSIXU 23 /* 0x17 Some [[:class:]] under /u; the FLAGS field gives which one */ -#define POSIXA 24 /* 0x18 Some [[:class:]] under /a; the FLAGS field gives which one */ -#define NPOSIXD 25 /* 0x19 complement of POSIXD, [[:^class:]] */ -#define NPOSIXL 26 /* 0x1a complement of POSIXL, [[:^class:]] */ -#define NPOSIXU 27 /* 0x1b complement of POSIXU, [[:^class:]] */ -#define NPOSIXA 28 /* 0x1c complement of POSIXA, [[:^class:]] */ -#define CLUMP 29 /* 0x1d Match any extended grapheme cluster sequence */ -#define BRANCH 30 /* 0x1e Match this alternative, or the next... */ -#define EXACT 31 /* 0x1f Match this string (preceded by length). */ -#define EXACTL 32 /* 0x20 Like EXACT, but /l is in effect. */ -#define EXACTF 33 /* 0x21 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */ -#define EXACTFL 34 /* 0x22 Match this string (not guaranteed to be folded) using /il rules (w/len). */ -#define EXACTFU 35 /* 0x23 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */ -#define EXACTFA 36 /* 0x24 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */ -#define EXACTFU_SS 37 /* 0x25 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */ -#define EXACTFLU8 38 /* 0x26 Rare cirucmstances: like EXACTFU, but is under /l, UTF-8, folded, and everything in it is above 255. */ -#define EXACTFA_NO_TRIE 39 /* 0x27 Match this string (which is not trie-able; not guaranteed to be folded) using /iaa rules (w/len). */ -#define NOTHING 40 /* 0x28 Match empty string. */ -#define TAIL 41 /* 0x29 Match empty string. Can jump here from outside. */ -#define STAR 42 /* 0x2a Match this (simple) thing 0 or more times. */ -#define PLUS 43 /* 0x2b Match this (simple) thing 1 or more times. */ -#define CURLY 44 /* 0x2c Match this simple thing {n,m} times. */ -#define CURLYN 45 /* 0x2d Capture next-after-this simple thing */ -#define CURLYM 46 /* 0x2e Capture this medium-complex thing {n,m} times. */ -#define CURLYX 47 /* 0x2f Match this complex thing {n,m} times. */ -#define WHILEM 48 /* 0x30 Do curly processing and see if rest matches. */ -#define OPEN 49 /* 0x31 Mark this point in input as start of #n. */ -#define CLOSE 50 /* 0x32 Analogous to OPEN. */ -#define REF 51 /* 0x33 Match some already matched string */ -#define REFF 52 /* 0x34 Match already matched string, folded using native charset rules for non-utf8 */ -#define REFFL 53 /* 0x35 Match already matched string, folded in loc. */ -#define REFFU 54 /* 0x36 Match already matched string, folded using unicode rules for non-utf8 */ -#define REFFA 55 /* 0x37 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */ -#define NREF 56 /* 0x38 Match some already matched string */ -#define NREFF 57 /* 0x39 Match already matched string, folded using native charset rules for non-utf8 */ -#define NREFFL 58 /* 0x3a Match already matched string, folded in loc. */ -#define NREFFU 59 /* 0x3b Match already matched string, folded using unicode rules for non-utf8 */ -#define NREFFA 60 /* 0x3c Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */ -#define LONGJMP 61 /* 0x3d Jump far away. */ -#define BRANCHJ 62 /* 0x3e BRANCH with long offset. */ -#define IFMATCH 63 /* 0x3f Succeeds if the following matches. */ -#define UNLESSM 64 /* 0x40 Fails if the following matches. */ -#define SUSPEND 65 /* 0x41 "Independent" sub-RE. */ -#define IFTHEN 66 /* 0x42 Switch, should be preceded by switcher. */ -#define GROUPP 67 /* 0x43 Whether the group matched. */ -#define EVAL 68 /* 0x44 Execute some Perl code. */ -#define MINMOD 69 /* 0x45 Next operator is not greedy. */ -#define LOGICAL 70 /* 0x46 Next opcode should set the flag only. */ -#define RENUM 71 /* 0x47 Group with independently numbered parens. */ -#define TRIE 72 /* 0x48 Match many EXACT(F[ALU]?)? at once. flags==type */ -#define TRIEC 73 /* 0x49 Same as TRIE, but with embedded charclass data */ -#define AHOCORASICK 74 /* 0x4a Aho Corasick stclass. flags==type */ -#define AHOCORASICKC 75 /* 0x4b Same as AHOCORASICK, but with embedded charclass data */ -#define GOSUB 76 /* 0x4c recurse to paren arg1 at (signed) ofs arg2 */ -#define GOSTART 77 /* 0x4d recurse to start of pattern */ -#define NGROUPP 78 /* 0x4e Whether the group matched. */ -#define INSUBP 79 /* 0x4f Whether we are in a specific recurse. */ -#define DEFINEP 80 /* 0x50 Never execute directly. */ -#define ENDLIKE 81 /* 0x51 Used only for the type field of verbs */ -#define OPFAIL 82 /* 0x52 Same as (?!) */ -#define ACCEPT 83 /* 0x53 Accepts the current matched string. */ -#define VERB 84 /* 0x54 Used only for the type field of verbs */ -#define PRUNE 85 /* 0x55 Pattern fails at this startpoint if no-backtracking through this */ -#define MARKPOINT 86 /* 0x56 Push the current location for rollback by cut. */ -#define SKIP 87 /* 0x57 On failure skip forward (to the mark) before retrying */ -#define COMMIT 88 /* 0x58 Pattern fails outright if backtracking through this */ -#define CUTGROUP 89 /* 0x59 On failure go to the next alternation in the group */ -#define KEEPS 90 /* 0x5a $& begins here. */ -#define LNBREAK 91 /* 0x5b generic newline pattern */ -#define OPTIMIZED 92 /* 0x5c Placeholder for dump. */ -#define PSEUDO 93 /* 0x5d Pseudo opcode for internal use. */ +#define ANYOF 18 /* 0x12 Match character in (or not in) this class, single char match only */ +#define ANYOFL 19 /* 0x13 Like ANYOF, but /l is in effect */ +#define POSIXD 20 /* 0x14 Some [[:class:]] under /d; the FLAGS field gives which one */ +#define POSIXL 21 /* 0x15 Some [[:class:]] under /l; the FLAGS field gives which one */ +#define POSIXU 22 /* 0x16 Some [[:class:]] under /u; the FLAGS field gives which one */ +#define POSIXA 23 /* 0x17 Some [[:class:]] under /a; the FLAGS field gives which one */ +#define NPOSIXD 24 /* 0x18 complement of POSIXD, [[:^class:]] */ +#define NPOSIXL 25 /* 0x19 complement of POSIXL, [[:^class:]] */ +#define NPOSIXU 26 /* 0x1a complement of POSIXU, [[:^class:]] */ +#define NPOSIXA 27 /* 0x1b complement of POSIXA, [[:^class:]] */ +#define CLUMP 28 /* 0x1c Match any extended grapheme cluster sequence */ +#define BRANCH 29 /* 0x1d Match this alternative, or the next... */ +#define EXACT 30 /* 0x1e Match this string (preceded by length). */ +#define EXACTL 31 /* 0x1f Like EXACT, but /l is in effect. */ +#define EXACTF 32 /* 0x20 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */ +#define EXACTFL 33 /* 0x21 Match this string (not guaranteed to be folded) using /il rules (w/len). */ +#define EXACTFU 34 /* 0x22 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */ +#define EXACTFA 35 /* 0x23 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */ +#define EXACTFU_SS 36 /* 0x24 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */ +#define EXACTFLU8 37 /* 0x25 Rare cirucmstances: like EXACTFU, but is under /l, UTF-8, folded, and everything in it is above 255. */ +#define EXACTFA_NO_TRIE 38 /* 0x26 Match this string (which is not trie-able; not guaranteed to be folded) using /iaa rules (w/len). */ +#define NOTHING 39 /* 0x27 Match empty string. */ +#define TAIL 40 /* 0x28 Match empty string. Can jump here from outside. */ +#define STAR 41 /* 0x29 Match this (simple) thing 0 or more times. */ +#define PLUS 42 /* 0x2a Match this (simple) thing 1 or more times. */ +#define CURLY 43 /* 0x2b Match this simple thing {n,m} times. */ +#define CURLYN 44 /* 0x2c Capture next-after-this simple thing */ +#define CURLYM 45 /* 0x2d Capture this medium-complex thing {n,m} times. */ +#define CURLYX 46 /* 0x2e Match this complex thing {n,m} times. */ +#define WHILEM 47 /* 0x2f Do curly processing and see if rest matches. */ +#define OPEN 48 /* 0x30 Mark this point in input as start of #n. */ +#define CLOSE 49 /* 0x31 Analogous to OPEN. */ +#define REF 50 /* 0x32 Match some already matched string */ +#define REFF 51 /* 0x33 Match already matched string, folded using native charset rules for non-utf8 */ +#define REFFL 52 /* 0x34 Match already matched string, folded in loc. */ +#define REFFU 53 /* 0x35 Match already matched string, folded using unicode rules for non-utf8 */ +#define REFFA 54 /* 0x36 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */ +#define NREF 55 /* 0x37 Match some already matched string */ +#define NREFF 56 /* 0x38 Match already matched string, folded using native charset rules for non-utf8 */ +#define NREFFL 57 /* 0x39 Match already matched string, folded in loc. */ +#define NREFFU 58 /* 0x3a Match already matched string, folded using unicode rules for non-utf8 */ +#define NREFFA 59 /* 0x3b Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */ +#define LONGJMP 60 /* 0x3c Jump far away. */ +#define BRANCHJ 61 /* 0x3d BRANCH with long offset. */ +#define IFMATCH 62 /* 0x3e Succeeds if the following matches. */ +#define UNLESSM 63 /* 0x3f Fails if the following matches. */ +#define SUSPEND 64 /* 0x40 "Independent" sub-RE. */ +#define IFTHEN 65 /* 0x41 Switch, should be preceded by switcher. */ +#define GROUPP 66 /* 0x42 Whether the group matched. */ +#define EVAL 67 /* 0x43 Execute some Perl code. */ +#define MINMOD 68 /* 0x44 Next operator is not greedy. */ +#define LOGICAL 69 /* 0x45 Next opcode should set the flag only. */ +#define RENUM 70 /* 0x46 Group with independently numbered parens. */ +#define TRIE 71 /* 0x47 Match many EXACT(F[ALU]?)? at once. flags==type */ +#define TRIEC 72 /* 0x48 Same as TRIE, but with embedded charclass data */ +#define AHOCORASICK 73 /* 0x49 Aho Corasick stclass. flags==type */ +#define AHOCORASICKC 74 /* 0x4a Same as AHOCORASICK, but with embedded charclass data */ +#define GOSUB 75 /* 0x4b recurse to paren arg1 at (signed) ofs arg2 */ +#define GOSTART 76 /* 0x4c recurse to start of pattern */ +#define NGROUPP 77 /* 0x4d Whether the group matched. */ +#define INSUBP 78 /* 0x4e Whether we are in a specific recurse. */ +#define DEFINEP 79 /* 0x4f Never execute directly. */ +#define ENDLIKE 80 /* 0x50 Used only for the type field of verbs */ +#define OPFAIL 81 /* 0x51 Same as (?!) */ +#define ACCEPT 82 /* 0x52 Accepts the current matched string. */ +#define VERB 83 /* 0x53 Used only for the type field of verbs */ +#define PRUNE 84 /* 0x54 Pattern fails at this startpoint if no-backtracking through this */ +#define MARKPOINT 85 /* 0x55 Push the current location for rollback by cut. */ +#define SKIP 86 /* 0x56 On failure skip forward (to the mark) before retrying */ +#define COMMIT 87 /* 0x57 Pattern fails outright if backtracking through this */ +#define CUTGROUP 88 /* 0x58 On failure go to the next alternation in the group */ +#define KEEPS 89 /* 0x59 $& begins here. */ +#define LNBREAK 90 /* 0x5a generic newline pattern */ +#define OPTIMIZED 91 /* 0x5b Placeholder for dump. */ +#define PSEUDO 92 /* 0x5c Pseudo opcode for internal use. */ /* ------------ States ------------- */ #define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */ #define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */ @@ -171,7 +170,6 @@ EXTCONST U8 PL_regkind[] = { NBOUND, /* NBOUNDA */ REG_ANY, /* REG_ANY */ REG_ANY, /* SANY */ - REG_ANY, /* CANY */ ANYOF, /* ANYOF */ ANYOF, /* ANYOFL */ POSIXD, /* POSIXD */ @@ -313,7 +311,6 @@ static const U8 regarglen[] = { 0, /* NBOUNDA */ 0, /* REG_ANY */ 0, /* SANY */ - 0, /* CANY */ EXTRA_SIZE(struct regnode_1), /* ANYOF */ EXTRA_SIZE(struct regnode_1), /* ANYOFL */ 0, /* POSIXD */ @@ -412,7 +409,6 @@ static const char reg_off_by_arg[] = { 0, /* NBOUNDA */ 0, /* REG_ANY */ 0, /* SANY */ - 0, /* CANY */ 0, /* ANYOF */ 0, /* ANYOFL */ 0, /* POSIXD */ @@ -516,82 +512,81 @@ EXTCONST char * const PL_reg_name[] = { "NBOUNDA", /* 0x0f */ "REG_ANY", /* 0x10 */ "SANY", /* 0x11 */ - "CANY", /* 0x12 */ - "ANYOF", /* 0x13 */ - "ANYOFL", /* 0x14 */ - "POSIXD", /* 0x15 */ - "POSIXL", /* 0x16 */ - "POSIXU", /* 0x17 */ - "POSIXA", /* 0x18 */ - "NPOSIXD", /* 0x19 */ - "NPOSIXL", /* 0x1a */ - "NPOSIXU", /* 0x1b */ - "NPOSIXA", /* 0x1c */ - "CLUMP", /* 0x1d */ - "BRANCH", /* 0x1e */ - "EXACT", /* 0x1f */ - "EXACTL", /* 0x20 */ - "EXACTF", /* 0x21 */ - "EXACTFL", /* 0x22 */ - "EXACTFU", /* 0x23 */ - "EXACTFA", /* 0x24 */ - "EXACTFU_SS", /* 0x25 */ - "EXACTFLU8", /* 0x26 */ - "EXACTFA_NO_TRIE", /* 0x27 */ - "NOTHING", /* 0x28 */ - "TAIL", /* 0x29 */ - "STAR", /* 0x2a */ - "PLUS", /* 0x2b */ - "CURLY", /* 0x2c */ - "CURLYN", /* 0x2d */ - "CURLYM", /* 0x2e */ - "CURLYX", /* 0x2f */ - "WHILEM", /* 0x30 */ - "OPEN", /* 0x31 */ - "CLOSE", /* 0x32 */ - "REF", /* 0x33 */ - "REFF", /* 0x34 */ - "REFFL", /* 0x35 */ - "REFFU", /* 0x36 */ - "REFFA", /* 0x37 */ - "NREF", /* 0x38 */ - "NREFF", /* 0x39 */ - "NREFFL", /* 0x3a */ - "NREFFU", /* 0x3b */ - "NREFFA", /* 0x3c */ - "LONGJMP", /* 0x3d */ - "BRANCHJ", /* 0x3e */ - "IFMATCH", /* 0x3f */ - "UNLESSM", /* 0x40 */ - "SUSPEND", /* 0x41 */ - "IFTHEN", /* 0x42 */ - "GROUPP", /* 0x43 */ - "EVAL", /* 0x44 */ - "MINMOD", /* 0x45 */ - "LOGICAL", /* 0x46 */ - "RENUM", /* 0x47 */ - "TRIE", /* 0x48 */ - "TRIEC", /* 0x49 */ - "AHOCORASICK", /* 0x4a */ - "AHOCORASICKC", /* 0x4b */ - "GOSUB", /* 0x4c */ - "GOSTART", /* 0x4d */ - "NGROUPP", /* 0x4e */ - "INSUBP", /* 0x4f */ - "DEFINEP", /* 0x50 */ - "ENDLIKE", /* 0x51 */ - "OPFAIL", /* 0x52 */ - "ACCEPT", /* 0x53 */ - "VERB", /* 0x54 */ - "PRUNE", /* 0x55 */ - "MARKPOINT", /* 0x56 */ - "SKIP", /* 0x57 */ - "COMMIT", /* 0x58 */ - "CUTGROUP", /* 0x59 */ - "KEEPS", /* 0x5a */ - "LNBREAK", /* 0x5b */ - "OPTIMIZED", /* 0x5c */ - "PSEUDO", /* 0x5d */ + "ANYOF", /* 0x12 */ + "ANYOFL", /* 0x13 */ + "POSIXD", /* 0x14 */ + "POSIXL", /* 0x15 */ + "POSIXU", /* 0x16 */ + "POSIXA", /* 0x17 */ + "NPOSIXD", /* 0x18 */ + "NPOSIXL", /* 0x19 */ + "NPOSIXU", /* 0x1a */ + "NPOSIXA", /* 0x1b */ + "CLUMP", /* 0x1c */ + "BRANCH", /* 0x1d */ + "EXACT", /* 0x1e */ + "EXACTL", /* 0x1f */ + "EXACTF", /* 0x20 */ + "EXACTFL", /* 0x21 */ + "EXACTFU", /* 0x22 */ + "EXACTFA", /* 0x23 */ + "EXACTFU_SS", /* 0x24 */ + "EXACTFLU8", /* 0x25 */ + "EXACTFA_NO_TRIE", /* 0x26 */ + "NOTHING", /* 0x27 */ + "TAIL", /* 0x28 */ + "STAR", /* 0x29 */ + "PLUS", /* 0x2a */ + "CURLY", /* 0x2b */ + "CURLYN", /* 0x2c */ + "CURLYM", /* 0x2d */ + "CURLYX", /* 0x2e */ + "WHILEM", /* 0x2f */ + "OPEN", /* 0x30 */ + "CLOSE", /* 0x31 */ + "REF", /* 0x32 */ + "REFF", /* 0x33 */ + "REFFL", /* 0x34 */ + "REFFU", /* 0x35 */ + "REFFA", /* 0x36 */ + "NREF", /* 0x37 */ + "NREFF", /* 0x38 */ + "NREFFL", /* 0x39 */ + "NREFFU", /* 0x3a */ + "NREFFA", /* 0x3b */ + "LONGJMP", /* 0x3c */ + "BRANCHJ", /* 0x3d */ + "IFMATCH", /* 0x3e */ + "UNLESSM", /* 0x3f */ + "SUSPEND", /* 0x40 */ + "IFTHEN", /* 0x41 */ + "GROUPP", /* 0x42 */ + "EVAL", /* 0x43 */ + "MINMOD", /* 0x44 */ + "LOGICAL", /* 0x45 */ + "RENUM", /* 0x46 */ + "TRIE", /* 0x47 */ + "TRIEC", /* 0x48 */ + "AHOCORASICK", /* 0x49 */ + "AHOCORASICKC", /* 0x4a */ + "GOSUB", /* 0x4b */ + "GOSTART", /* 0x4c */ + "NGROUPP", /* 0x4d */ + "INSUBP", /* 0x4e */ + "DEFINEP", /* 0x4f */ + "ENDLIKE", /* 0x50 */ + "OPFAIL", /* 0x51 */ + "ACCEPT", /* 0x52 */ + "VERB", /* 0x53 */ + "PRUNE", /* 0x54 */ + "MARKPOINT", /* 0x55 */ + "SKIP", /* 0x56 */ + "COMMIT", /* 0x57 */ + "CUTGROUP", /* 0x58 */ + "KEEPS", /* 0x59 */ + "LNBREAK", /* 0x5a */ + "OPTIMIZED", /* 0x5b */ + "PSEUDO", /* 0x5c */ /* ------------ States ------------- */ "TRIE_next", /* REGNODE_MAX +0x01 */ "TRIE_next_fail", /* REGNODE_MAX +0x02 */ @@ -695,7 +690,6 @@ EXTCONST char * const PL_reg_intflags_name[] = { "CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */ "USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */ "NOSCAN", /* 0x00000040 - PREGf_NOSCAN */ - "CANY_SEEN", /* 0x00000080 - PREGf_CANY_SEEN */ "GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */ "GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */ "ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */ @@ -705,7 +699,7 @@ EXTCONST char * const PL_reg_intflags_name[] = { #endif /* DOINIT */ #ifdef DEBUGGING -# define REG_INTFLAGS_NAME_SIZE 13 +# define REG_INTFLAGS_NAME_SIZE 12 #endif /* The following have no fixed length. U8 so we can do strchr() on it. */ @@ -726,7 +720,7 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__ = { EXTCONST U8 PL_varies_bitmask[]; #else EXTCONST U8 PL_varies_bitmask[] = { - 0x00, 0x00, 0x00, 0x60, 0x00, 0xFC, 0xF9, 0x5F, 0x06, 0x00, 0x00, 0x00 + 0x00, 0x00, 0x00, 0x30, 0x00, 0xFE, 0xFC, 0x2F, 0x03, 0x00, 0x00, 0x00 }; #endif /* DOINIT */ @@ -738,8 +732,8 @@ EXTCONST U8 PL_varies_bitmask[] = { EXTCONST U8 PL_simple[] __attribute__deprecated__; #else EXTCONST U8 PL_simple[] __attribute__deprecated__ = { - REG_ANY, SANY, CANY, ANYOF, ANYOFL, POSIXD, POSIXL, POSIXU, POSIXA, - NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA, + REG_ANY, SANY, ANYOF, ANYOFL, POSIXD, POSIXL, POSIXU, POSIXA, NPOSIXD, + NPOSIXL, NPOSIXU, NPOSIXA, 0 }; #endif /* DOINIT */ @@ -748,7 +742,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__ = { EXTCONST U8 PL_simple_bitmask[]; #else EXTCONST U8 PL_simple_bitmask[] = { - 0x00, 0x00, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + 0x00, 0x00, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; #endif /* DOINIT */ diff --git a/t/lib/Cname.pm b/t/lib/Cname.pm index 4a1bc16d85..dad356ae66 100644 --- a/t/lib/Cname.pm +++ b/t/lib/Cname.pm @@ -24,16 +24,6 @@ sub translator { if ( $str eq 'TOO-LONG-STR') { return 'A' x 256; } - if ($str eq 'MALFORMED') { - $str = "\xDF\xDFabc"; - utf8::upgrade($str); - - no warnings 'deprecated'; - - # Create a malformed in first and second characters. - $str =~ s/^\C/A/; - $str =~ s/^(\C\C)\C/$1A/; - } return $str; } diff --git a/t/op/bop.t b/t/op/bop.t index 09f2be9ab9..8acd3b2afd 100644 --- a/t/op/bop.t +++ b/t/op/bop.t @@ -15,7 +15,7 @@ BEGIN { # If you find tests are failing, please try adding names to tests to track # down where the failure is, and supply your new names as a patch. # (Just-in-time test naming) -plan tests => 194 + (10*13*2) + 5; +plan tests => 192 + (10*13*2) + 5; # numerics ok ((0xdead & 0xbeef) == 0x9ead); @@ -430,40 +430,6 @@ SKIP: { is($b, chr(0x1FE) x 0x0FF . chr(0x101) x 2); } -# update to pp_complement() via Coverity -SKIP: { - # UTF-EBCDIC is limited to 0x7fffffff and can't encode ~0. - skip "Complements exceed maximum representable on EBCDIC ", 2 if $::IS_EBCDIC; - - my $str = "\x{10000}\x{800}"; - # U+10000 is four bytes in UTF-8/UTF-EBCDIC. - # U+0800 is three bytes in UTF-8/UTF-EBCDIC. - - no warnings "utf8"; - { - use bytes; - no warnings 'deprecated'; - $str =~ s/\C\C\z//; - } - - # it's really bogus that (~~malformed) is \0. - my $ref = "\x{10000}\0"; - is(~~$str, $ref); - - # same test, but this time with a longer replacement string that - # exercises a different branch in pp_subsr() - - $str = "\x{10000}\x{800}"; - { - use bytes; - no warnings 'deprecated'; - $str =~ s/\C\C\z/\0\0\0/; - } - - # it's also bogus that (~~malformed) is \0\0\0\0. - my $ref = "\x{10000}\0\0\0\0"; - is(~~$str, $ref, "use bytes with long replacement"); -} # New string- and number-specific bitwise ops { diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 891bb66061..230fd891a6 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -28,57 +28,6 @@ run_tests() unless caller; sub run_tests { { - no warnings 'deprecated'; - - my $message = '\C matches octet'; - $_ = "a\x{100}b"; - ok(/(.)(\C)(\C)(.)/, $message); - is($1, "a", $message); - if ($::IS_ASCII) { # ASCII (or equivalent), should be UTF-8 - is($2, "\xC4", $message); - is($3, "\x80", $message); - } - elsif ($::IS_EBCDIC) { # EBCDIC (or equivalent), should be UTF-EBCDIC - is($2, "\x8C", $message); - is($3, "\x41", $message); - } - else { - SKIP: { - ok 0, "Unexpected platform", "ord ('A') =" . ord 'A'; - skip "Unexpected platform"; - } - } - is($4, "b", $message); - } - - { - no warnings 'deprecated'; - - my $message = '\C matches octet'; - $_ = "\x{100}"; - ok(/(\C)/g, $message); - if ($::IS_ASCII) { - is($1, "\xC4", $message); - } - elsif ($::IS_EBCDIC) { - is($1, "\x8C", $message); - } - else { - ok 0, "Unexpected platform", "ord ('A') = " . ord 'A'; - } - ok(/(\C)/g, $message); - if ($::IS_ASCII) { - is($1, "\x80", $message); - } - elsif ($::IS_EBCDIC) { - is($1, "\x41", $message); - } - else { - ok 0, "Unexpected platform", "ord ('A') = " . ord 'A'; - } - } - - { # Japhy -- added 03/03/2001 () = (my $str = "abc") =~ /(...)/; $str = "def"; @@ -284,24 +233,6 @@ sub run_tests { } { - no warnings 'deprecated'; - - my $message = '. matches \n with /s'; - my $str1 = "foo\nbar"; - my $str2 = "foo\n\x{100}bar"; - my ($a, $b) = map {chr} $::IS_ASCII ? (0xc4, 0x80) : (0x8c, 0x41); - my @a; - @a = $str1 =~ /./g; is(@a, 6, $message); is("@a", "f o o b a r", $message); - @a = $str1 =~ /./gs; is(@a, 7, $message); is("@a", "f o o \n b a r", $message); - @a = $str1 =~ /\C/g; is(@a, 7, $message); is("@a", "f o o \n b a r", $message); - @a = $str1 =~ /\C/gs; is(@a, 7, $message); is("@a", "f o o \n b a r", $message); - @a = $str2 =~ /./g; is(@a, 7, $message); is("@a", "f o o \x{100} b a r", $message); - @a = $str2 =~ /./gs; is(@a, 8, $message); is("@a", "f o o \n \x{100} b a r", $message); - @a = $str2 =~ /\C/g; is(@a, 9, $message); is("@a", "f o o \n $a $b b a r", $message); - @a = $str2 =~ /\C/gs; is(@a, 9, $message); is("@a", "f o o \n $a $b b a r", $message); - } - - { no warnings 'digit'; # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. my $x; @@ -492,11 +423,6 @@ sub run_tests { =~ /^(\X)!/ && $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}", $message); - no warnings 'deprecated'; - - $message = '\C and \X'; - like("!abc!", qr/a\Cc/, $message); - like("!abc!", qr/a\Xc/, $message); } { @@ -552,13 +478,6 @@ sub run_tests { $& eq "Francais", $message); ok("Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran.ais/ && $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", $message); - { - no warnings 'deprecated'; - ok("Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Cais/ && - $& eq "Francais", $message); - # COMBINING CEDILLA is two bytes when encoded - like("Franc\N{COMBINING CEDILLA}ais", qr/Franc\C\Cais/, $message); - } ok("Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Xais/ && $& eq "Francais", $message); ok("Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran\Xais/ && @@ -1114,8 +1033,6 @@ sub run_tests { # differently undef $w; eval q [ok "\N{TOO-LONG-STR}" =~ /^\N{TOO-LONG-STR}$/, 'Verify that what once was too long a string works']; - eval 'q(syntax error) =~ /\N{MALFORMED}/'; - ok $@ && $@ =~ /Malformed/, 'Verify that malformed utf8 gives an error'; eval 'q() =~ /\N{4F}/'; ok $@ && $@ =~ /Invalid character/, 'Verify that leading digit in name gives error'; eval 'q() =~ /\N{COM,MA}/'; diff --git a/t/re/pat_rt_report.t b/t/re/pat_rt_report.t index ed8fafcc78..f35e72c35f 100644 --- a/t/re/pat_rt_report.t +++ b/t/re/pat_rt_report.t @@ -20,7 +20,7 @@ use warnings; use 5.010; use Config; -plan tests => 2532; # Update this when adding/deleting tests. +plan tests => 2500; # Update this when adding/deleting tests. run_tests() unless caller; @@ -89,13 +89,6 @@ sub run_tests { } { - no warnings 'deprecated'; - my $message = '\C and É; Bug 20001230.002'; - ok("École" =~ /^\C\C(.)/ && $1 eq 'c', $message); - like("École", qr/^\C\C(c)/, $message); - } - - { # The original bug report had 'no utf8' here but that was irrelevant. my $message = "Don't dump core; Bug 20010306.008"; @@ -233,59 +226,6 @@ sub run_tests { } { - our $a = "x\x{100}"; - chop $a; # Leaves the UTF-8 flag - $a .= "y"; # 1 byte before 'y'. - - no warnings 'deprecated'; - - like($a, qr/^\C/, 'match one \C on 1-byte UTF-8; Bug 15763'); - like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); - - like($a, qr/^\Cy/, 'match \Cy; Bug 15763'); - like($a, qr/^\C{1}y/, 'match \C{1}y; Bug 15763'); - - unlike($a, qr/^\C\Cy/, q {don't match two \Cy; Bug 15763}); - unlike($a, qr/^\C{2}y/, q {don't match \C{2}y; Bug 15763}); - - $a = "\x{100}y"; # 2 bytes before "y" - - like($a, qr/^\C/, 'match one \C on 2-byte UTF-8; Bug 15763'); - like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); - like($a, qr/^\C\C/, 'match two \C; Bug 15763'); - like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763'); - - like($a, qr/^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte; Bug 15763'); - like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763'); - - like($a, qr/^\C\Cy/, 'match two \C; Bug 15763'); - like($a, qr/^\C{2}y/, 'match \C{2}; Bug 15763'); - - unlike($a, qr/^\C\C\Cy/, q {don't match three \Cy; Bug 15763}); - unlike($a, qr/^\C{2}\Cy/, q {don't match \C{2}\Cy; Bug 15763}); - unlike($a, qr/^\C{3}y/, q {don't match \C{3}y; Bug 15763}); - - $a = "\x{1000}y"; # 3 bytes before "y" - - like($a, qr/^\C/, 'match one \C on three-byte UTF-8; Bug 15763'); - like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); - like($a, qr/^\C\C/, 'match two \C; Bug 15763'); - like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763'); - like($a, qr/^\C\C\C/, 'match three \C; Bug 15763'); - like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763'); - - like($a, qr/^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte; Bug 15763'); - like($a, qr/^\C{4}/, 'match \C{4}; Bug 15763'); - - like($a, qr/^\C\C\Cy/, 'match three \Cy; Bug 15763'); - like($a, qr/^\C{3}y/, 'match \C{3}y; Bug 15763'); - - unlike($a, qr/^\C\C\C\Cy/, q {don't match four \Cy; Bug 15763}); - unlike($a, qr/^\C{4}y/, q {don't match \C{4}y; Bug 15763}); - } - - - { my $message = 'UTF-8 matching; Bug 15397'; like("\x{100}", qr/\x{100}/, $message); like("\x{100}", qr/(\x{100})/, $message); @@ -1173,13 +1113,6 @@ EOP # in the report above that only happened in a thread. my $s = "\x{1ff}" . "f" x 32; ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap"); - - # this one segfaulted under the conditions above - # of course, CANY is evil, maybe it should crash - { - no warnings 'deprecated'; - ok($s =~ /.\C+/, "CANY pointer wrap"); - } } } # End of sub run_tests |