summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2015-06-19 12:47:05 +0100
committerDavid Mitchell <davem@iabyn.com>2015-06-19 12:47:05 +0100
commit33c28ab263ac8bba71954d61ec55d7f1dc6c0eca (patch)
tree96f97216db61bab1ff879fb662e18d1c64db471d
parent9558026484c47d197ababb92c9e5477b379f7c42 (diff)
downloadperl-33c28ab263ac8bba71954d61ec55d7f1dc6c0eca.tar.gz
remove deprecated /\C/ RE character class
This horrible thing broke encapsulation and was as buggy as a very buggy thing. It's been officially deprecated since 5.20.0 and now it can finally die die die!!!!
-rw-r--r--dump.c1
-rw-r--r--ext/XS-APItest/t/callregexec.t6
-rw-r--r--pod/perldebguts.pod1
-rw-r--r--pod/perldelta.pod8
-rw-r--r--pod/perldiag.pod17
-rw-r--r--pod/perlre.pod5
-rw-r--r--pod/perlrebackslash.pod14
-rw-r--r--pod/perlreref.pod2
-rw-r--r--pod/perlretut.pod4
-rw-r--r--regcomp.c21
-rw-r--r--regcomp.h3
-rw-r--r--regcomp.sym1
-rw-r--r--regexec.c47
-rw-r--r--regnodes.h320
-rw-r--r--t/lib/Cname.pm10
-rw-r--r--t/op/bop.t36
-rw-r--r--t/re/pat_advanced.t83
-rw-r--r--t/re/pat_rt_report.t69
18 files changed, 185 insertions, 463 deletions
diff --git a/dump.c b/dump.c
index 802dddf836..bd9d12c608 100644
--- a/dump.c
+++ b/dump.c
@@ -1415,7 +1415,6 @@ const struct flag_to_name regexp_core_intflags_names[] = {
{PREGf_CUTGROUP_SEEN, "CUTGROUP_SEEN,"},
{PREGf_USE_RE_EVAL, "USE_RE_EVAL,"},
{PREGf_NOSCAN, "NOSCAN,"},
- {PREGf_CANY_SEEN, "CANY_SEEN,"},
{PREGf_GPOS_SEEN, "GPOS_SEEN,"},
{PREGf_GPOS_FLOAT, "GPOS_FLOAT,"},
{PREGf_ANCH_MBOL, "ANCH_MBOL,"},
diff --git a/ext/XS-APItest/t/callregexec.t b/ext/XS-APItest/t/callregexec.t
index 74e1e206df..22446b66f5 100644
--- a/ext/XS-APItest/t/callregexec.t
+++ b/ext/XS-APItest/t/callregexec.t
@@ -10,7 +10,7 @@ use strict;
use XS::APItest;
*callregexec = *XS::APItest::callregexec;
-use Test::More tests => 50;
+use Test::More tests => 48;
# Test that the regex engine can handle strings without terminating \0
# XXX This is by no means comprehensive; it doesn't test all ops, nor all
@@ -42,10 +42,6 @@ sub try {
try "ax", qr/a$/m, 1, 'MEOL';
try "ax", qr/a$/s, 1, 'SEOL';
try "abx", qr/^(ab|X)./s, 0, 'SANY';
- {
- no warnings 'deprecated';
- try "abx", qr/^(ab|X)\C/, 0, 'CANY';
- }
try "abx", qr/^(ab|X)./, 0, 'REG_ANY';
try "abx", qr/^ab(c|d|e|x)/, 0, 'TRIE/TRIEC';
try "abx", qr/^abx/, 0, 'EXACT';
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index 2b5561d2d7..064af64b55 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -592,7 +592,6 @@ will be lost.
# [Special] alternatives:
REG_ANY no Match any one character (except newline).
SANY no Match any one character.
- CANY no Match any one byte.
ANYOF sv 1 Match character in (or not in) this class,
single char match only
ANYOFL sv 1 Like ANYOF, but /l is in effect
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 2cb9d723cd..f7cd398586 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -45,7 +45,13 @@ XXX For a release on a stable branch, this section aspires to be:
If any exist, they are bugs, and we request that you submit a
report. See L</Reporting Bugs> below.
-[ List each incompatible change as a =head2 entry ]
+=head2 The C</\C/> character class has been removed.
+
+This regular expression character class was deprecated in v5.20.0 and has
+produced a deprecation warning since v5.22.0. It is now a compile-time
+error. If you need to examine the individual bytes that make up a
+UTF8-encoded character, then use C<utf8::encode()> on the string (or a
+copy) first.
=head1 Deprecations
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index ab94d597b7..1d53e5df92 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1559,15 +1559,14 @@ defined in the C<:alias> import argument to C<use charnames>, but they
could be defined by a translator installed into C<$^H{charnames}>.
See L<charnames/CUSTOM ALIASES>.
-=item \C is deprecated in regex; marked by S<<-- HERE> in m/%s/
-
-(D deprecated, regexp) The \C character class is deprecated, and will
-become a compile-time error in a future release of perl (tentatively
-v5.24). This construct allows you to match a single byte of what makes
-up a multi-byte single UTF8 character, and breaks encapsulation. It is
-currently also very buggy. If you really need to process the individual
-bytes, you probably want to convert your string to one where each
-underlying byte is stored as a character, with utf8::encode().
+=item \C no longer supported in regex; marked by S<<-- HERE> in m/%s/
+
+(F) The \C character class used to allow a match of single byte within a
+multi-byte utf-8 character, but was removed in v5.24 as it broke
+encapsulation and its implementation was extremely buggy. If you really
+need to process the individual bytes, you probably want to convert your
+string to one where each underlying byte is stored as a character, with
+utf8::encode().
=item "\c%c" is more clearly written simply as "%s"
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 2cf00fbd6d..a262b4cfc9 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -706,11 +706,6 @@ X<\g> X<\k> X<\K> X<backreference>
\pP [3] Match P, named property. Use \p{Prop} for longer names
\PP [3] Match non-P
\X [4] Match Unicode "eXtended grapheme cluster"
- \C Match a single C-language char (octet) even if that is
- part of a larger UTF-8 character. Thus it breaks up
- characters into their UTF-8 bytes, so you may end up
- with malformed pieces of UTF-8. Unsupported in
- lookbehind. (Deprecated.)
\1 [5] Backreference to a specific capture group or buffer.
'1' may actually be any positive integer.
\g1 [5] Backreference to a specific or previous group,
diff --git a/pod/perlrebackslash.pod b/pod/perlrebackslash.pod
index c216f255e0..08b9abc431 100644
--- a/pod/perlrebackslash.pod
+++ b/pod/perlrebackslash.pod
@@ -69,8 +69,6 @@ as C<Not in [].>
\b{}, \b Boundary. (\b is a backspace in []).
\B{}, \B Not a boundary. Not in [].
\cX Control-X.
- \C Single octet, even under UTF-8. Not in [].
- (Deprecated)
\d Character class for digits.
\D Character class for non-digits.
\e Escape character.
@@ -663,18 +661,6 @@ categories above. These are:
=over 4
-=item \C
-
-(Deprecated.) C<\C> always matches a single octet, even if the source
-string is encoded
-in UTF-8 format, and the character to be matched is a multi-octet character.
-This is very dangerous, because it violates
-the logical character abstraction and can cause UTF-8 sequences to become malformed.
-
-Use C<utf8::encode()> instead.
-
-Mnemonic: oI<C>tet.
-
=item \K
This appeared in perl 5.10.0. Anything matched left of C<\K> is
diff --git a/pod/perlreref.pod b/pod/perlreref.pod
index 848185e3a5..e9b784e445 100644
--- a/pod/perlreref.pod
+++ b/pod/perlreref.pod
@@ -144,8 +144,6 @@ and L<perlunicode> for details.
\V A non vertical whitespace
\R A generic newline (?>\v|\x0D\x0A)
- \C Match a byte (with Unicode, '.' matches a character)
- (Deprecated.)
\pP Match P-named (Unicode) property
\p{...} Match Unicode property with name longer than 1 character
\PP Match non-P
diff --git a/pod/perlretut.pod b/pod/perlretut.pod
index cb399ab631..9a3c696e0f 100644
--- a/pod/perlretut.pod
+++ b/pod/perlretut.pod
@@ -2295,10 +2295,6 @@ They evaluate true if the regexps do I<not> match:
$x =~ /foo(?!baz)/; # matches, 'baz' doesn't follow 'foo'
$x =~ /(?<!\s)foo/; # matches, there is no \s before 'foo'
-The C<\C> is unsupported in lookbehind, because the already
-treacherous definition of C<\C> would become even more so
-when going backwards.
-
Here is an example where a string containing blank-separated words,
numbers and single dashes is to be split into its components.
Using C</\s+/> alone won't work, because spaces are not required between
diff --git a/regcomp.c b/regcomp.c
index 712c8ed7d9..4a37b6ad98 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -808,9 +808,6 @@ static const scan_data_t zero_scan_data =
if (RExC_seen & REG_GPOS_SEEN) \
PerlIO_printf(Perl_debug_log,"REG_GPOS_SEEN "); \
\
- if (RExC_seen & REG_CANY_SEEN) \
- PerlIO_printf(Perl_debug_log,"REG_CANY_SEEN "); \
- \
if (RExC_seen & REG_RECURSE_SEEN) \
PerlIO_printf(Perl_debug_log,"REG_RECURSE_SEEN "); \
\
@@ -5069,7 +5066,6 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d",
OP(scan));
#endif
- case CANY:
case SANY:
if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
ssc_match_all_cp(data->start_class);
@@ -7288,8 +7284,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
lookbehind */
if (pRExC_state->num_code_blocks)
r->extflags |= RXf_EVAL_SEEN;
- if (RExC_seen & REG_CANY_SEEN)
- r->intflags |= PREGf_CANY_SEEN;
if (RExC_seen & REG_VERBARG_SEEN)
{
r->intflags |= PREGf_VERBARG_SEEN;
@@ -7701,13 +7695,8 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
sv_setpvn(sv, s, i);
TAINT_set(oldtainted);
#endif
- if ( (rx->intflags & PREGf_CANY_SEEN)
- ? (RXp_MATCH_UTF8(rx)
- && (!i || is_utf8_string((U8*)s, i)))
- : (RXp_MATCH_UTF8(rx)) )
- {
+ if (RXp_MATCH_UTF8(rx))
SvUTF8_on(sv);
- }
else
SvUTF8_off(sv);
if (TAINTING_get) {
@@ -11807,13 +11796,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
case 'C':
- ret = reg_node(pRExC_state, CANY);
- RExC_seen |= REG_CANY_SEEN;
- *flagp |= HASWIDTH|SIMPLE;
- if (PASS2) {
- ckWARNdep(RExC_parse+1, "\\C is deprecated");
- }
- goto finish_meta_pat;
+ vFAIL("\\C no longer supported");
case 'X':
ret = reg_node(pRExC_state, CLUMP);
*flagp |= HASWIDTH;
diff --git a/regcomp.h b/regcomp.h
index f418086c9f..897d35b7d8 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -134,7 +134,7 @@
#define PREGf_USE_RE_EVAL 0x00000020 /* compiled with "use re 'eval'" */
/* these used to be extflags, but are now intflags */
#define PREGf_NOSCAN 0x00000040
-#define PREGf_CANY_SEEN 0x00000080
+ /* spare */
#define PREGf_GPOS_SEEN 0x00000100
#define PREGf_GPOS_FLOAT 0x00000200
@@ -597,7 +597,6 @@ struct regnode_ssc {
#define REG_LOOKBEHIND_SEEN 0x00000002
#define REG_GPOS_SEEN 0x00000004
/* spare */
-#define REG_CANY_SEEN 0x00000010
#define REG_RECURSE_SEEN 0x00000020
#define REG_TOP_LEVEL_BRANCHES_SEEN 0x00000040
#define REG_VERBARG_SEEN 0x00000080
diff --git a/regcomp.sym b/regcomp.sym
index f79b87485c..ffcb53b21c 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -56,7 +56,6 @@ NBOUNDA NBOUND, no ; Match "" betweeen any \w\w or \W\W, where \w
#* [Special] alternatives:
REG_ANY REG_ANY, no 0 S ; Match any one character (except newline).
SANY REG_ANY, no 0 S ; Match any one character.
-CANY REG_ANY, no 0 S ; Match any one byte.
ANYOF ANYOF, sv 1 S ; Match character in (or not in) this class, single char match only
ANYOFL ANYOF, sv 1 S ; Like ANYOF, but /l is in effect
diff --git a/regexec.c b/regexec.c
index 31fdcb24e0..eaa60bd302 100644
--- a/regexec.c
+++ b/regexec.c
@@ -770,9 +770,7 @@ Perl_re_intuit_start(pTHX_
* caller will have set strpos=pos()-4; we look for the substr
* at position pos()-4+1, which lines up with the "a" */
- if (prog->check_offset_min == prog->check_offset_max
- && !(prog->intflags & PREGf_CANY_SEEN))
- {
+ if (prog->check_offset_min == prog->check_offset_max) {
/* Substring at constant offset from beg-of-str... */
SSize_t slen = SvCUR(check);
char *s = HOP3c(strpos, prog->check_offset_min, strend);
@@ -863,17 +861,10 @@ Perl_re_intuit_start(pTHX_
(IV)prog->check_end_shift);
});
- if (prog->intflags & PREGf_CANY_SEEN) {
- start_point= (U8*)(rx_origin + start_shift);
- end_point= (U8*)(strend - end_shift);
- if (start_point > end_point)
- goto fail_finish;
- } else {
- end_point = HOP3(strend, -end_shift, strbeg);
- start_point = HOPMAYBE3(rx_origin, start_shift, end_point);
- if (!start_point)
- goto fail_finish;
- }
+ end_point = HOP3(strend, -end_shift, strbeg);
+ start_point = HOPMAYBE3(rx_origin, start_shift, end_point);
+ if (!start_point)
+ goto fail_finish;
/* If the regex is absolutely anchored to either the start of the
@@ -1841,14 +1832,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
}
break;
- case CANY:
- REXEC_FBC_SCAN(
- if (tmp && (reginfo->intuit || regtry(reginfo, &s)))
- goto got_it;
- else
- tmp = doevery;
- );
- break;
case EXACTFA_NO_TRIE: /* This node only generated for non-utf8 patterns */
assert(! is_utf8_pat);
@@ -3266,7 +3249,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
if (minlen) {
const OPCODE op = OP(progi->regstclass);
/* don't bother with what can't match */
- if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
+ if (PL_regkind[op] != EXACT && PL_regkind[op] != TRIE)
strend = HOPc(strend, -(minlen - 1));
}
DEBUG_EXECUTE_r({
@@ -3822,7 +3805,7 @@ S_dump_exec_pos(pTHX_ const char *locinput,
if (pref0_len > pref_len)
pref0_len = pref_len;
{
- const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
+ const int is_uni = utf8_target ? 1 : 0;
RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
(locinput - pref_len),pref0_len, 60, 4, 5);
@@ -4986,12 +4969,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
sayNO;
goto increment_locinput;
- case CANY: /* \C */
- if (NEXTCHR_IS_EOS)
- sayNO;
- locinput++;
- break;
-
case REG_ANY: /* /./ */
if ((NEXTCHR_IS_EOS) || nextchr == '\n')
sayNO;
@@ -8105,16 +8082,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
else
scan = loceol;
break;
- case CANY: /* Move <scan> forward <max> bytes, unless goes off end */
- if (utf8_target && loceol - scan > max) {
-
- /* <loceol> hadn't been adjusted in the UTF-8 case */
- scan += max;
- }
- else {
- scan = loceol;
- }
- break;
case EXACTL:
_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
diff --git a/regnodes.h b/regnodes.h
index 3c9b991295..db32920c8c 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -6,8 +6,8 @@
/* Regops and State definitions */
-#define REGNODE_MAX 93
-#define REGMATCH_STATE_MAX 133
+#define REGNODE_MAX 92
+#define REGMATCH_STATE_MAX 132
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
@@ -29,82 +29,81 @@
#define NBOUNDA 15 /* 0x0f Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] */
#define REG_ANY 16 /* 0x10 Match any one character (except newline). */
#define SANY 17 /* 0x11 Match any one character. */
-#define CANY 18 /* 0x12 Match any one byte. */
-#define ANYOF 19 /* 0x13 Match character in (or not in) this class, single char match only */
-#define ANYOFL 20 /* 0x14 Like ANYOF, but /l is in effect */
-#define POSIXD 21 /* 0x15 Some [[:class:]] under /d; the FLAGS field gives which one */
-#define POSIXL 22 /* 0x16 Some [[:class:]] under /l; the FLAGS field gives which one */
-#define POSIXU 23 /* 0x17 Some [[:class:]] under /u; the FLAGS field gives which one */
-#define POSIXA 24 /* 0x18 Some [[:class:]] under /a; the FLAGS field gives which one */
-#define NPOSIXD 25 /* 0x19 complement of POSIXD, [[:^class:]] */
-#define NPOSIXL 26 /* 0x1a complement of POSIXL, [[:^class:]] */
-#define NPOSIXU 27 /* 0x1b complement of POSIXU, [[:^class:]] */
-#define NPOSIXA 28 /* 0x1c complement of POSIXA, [[:^class:]] */
-#define CLUMP 29 /* 0x1d Match any extended grapheme cluster sequence */
-#define BRANCH 30 /* 0x1e Match this alternative, or the next... */
-#define EXACT 31 /* 0x1f Match this string (preceded by length). */
-#define EXACTL 32 /* 0x20 Like EXACT, but /l is in effect. */
-#define EXACTF 33 /* 0x21 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */
-#define EXACTFL 34 /* 0x22 Match this string (not guaranteed to be folded) using /il rules (w/len). */
-#define EXACTFU 35 /* 0x23 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */
-#define EXACTFA 36 /* 0x24 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */
-#define EXACTFU_SS 37 /* 0x25 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */
-#define EXACTFLU8 38 /* 0x26 Rare cirucmstances: like EXACTFU, but is under /l, UTF-8, folded, and everything in it is above 255. */
-#define EXACTFA_NO_TRIE 39 /* 0x27 Match this string (which is not trie-able; not guaranteed to be folded) using /iaa rules (w/len). */
-#define NOTHING 40 /* 0x28 Match empty string. */
-#define TAIL 41 /* 0x29 Match empty string. Can jump here from outside. */
-#define STAR 42 /* 0x2a Match this (simple) thing 0 or more times. */
-#define PLUS 43 /* 0x2b Match this (simple) thing 1 or more times. */
-#define CURLY 44 /* 0x2c Match this simple thing {n,m} times. */
-#define CURLYN 45 /* 0x2d Capture next-after-this simple thing */
-#define CURLYM 46 /* 0x2e Capture this medium-complex thing {n,m} times. */
-#define CURLYX 47 /* 0x2f Match this complex thing {n,m} times. */
-#define WHILEM 48 /* 0x30 Do curly processing and see if rest matches. */
-#define OPEN 49 /* 0x31 Mark this point in input as start of #n. */
-#define CLOSE 50 /* 0x32 Analogous to OPEN. */
-#define REF 51 /* 0x33 Match some already matched string */
-#define REFF 52 /* 0x34 Match already matched string, folded using native charset rules for non-utf8 */
-#define REFFL 53 /* 0x35 Match already matched string, folded in loc. */
-#define REFFU 54 /* 0x36 Match already matched string, folded using unicode rules for non-utf8 */
-#define REFFA 55 /* 0x37 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
-#define NREF 56 /* 0x38 Match some already matched string */
-#define NREFF 57 /* 0x39 Match already matched string, folded using native charset rules for non-utf8 */
-#define NREFFL 58 /* 0x3a Match already matched string, folded in loc. */
-#define NREFFU 59 /* 0x3b Match already matched string, folded using unicode rules for non-utf8 */
-#define NREFFA 60 /* 0x3c Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
-#define LONGJMP 61 /* 0x3d Jump far away. */
-#define BRANCHJ 62 /* 0x3e BRANCH with long offset. */
-#define IFMATCH 63 /* 0x3f Succeeds if the following matches. */
-#define UNLESSM 64 /* 0x40 Fails if the following matches. */
-#define SUSPEND 65 /* 0x41 "Independent" sub-RE. */
-#define IFTHEN 66 /* 0x42 Switch, should be preceded by switcher. */
-#define GROUPP 67 /* 0x43 Whether the group matched. */
-#define EVAL 68 /* 0x44 Execute some Perl code. */
-#define MINMOD 69 /* 0x45 Next operator is not greedy. */
-#define LOGICAL 70 /* 0x46 Next opcode should set the flag only. */
-#define RENUM 71 /* 0x47 Group with independently numbered parens. */
-#define TRIE 72 /* 0x48 Match many EXACT(F[ALU]?)? at once. flags==type */
-#define TRIEC 73 /* 0x49 Same as TRIE, but with embedded charclass data */
-#define AHOCORASICK 74 /* 0x4a Aho Corasick stclass. flags==type */
-#define AHOCORASICKC 75 /* 0x4b Same as AHOCORASICK, but with embedded charclass data */
-#define GOSUB 76 /* 0x4c recurse to paren arg1 at (signed) ofs arg2 */
-#define GOSTART 77 /* 0x4d recurse to start of pattern */
-#define NGROUPP 78 /* 0x4e Whether the group matched. */
-#define INSUBP 79 /* 0x4f Whether we are in a specific recurse. */
-#define DEFINEP 80 /* 0x50 Never execute directly. */
-#define ENDLIKE 81 /* 0x51 Used only for the type field of verbs */
-#define OPFAIL 82 /* 0x52 Same as (?!) */
-#define ACCEPT 83 /* 0x53 Accepts the current matched string. */
-#define VERB 84 /* 0x54 Used only for the type field of verbs */
-#define PRUNE 85 /* 0x55 Pattern fails at this startpoint if no-backtracking through this */
-#define MARKPOINT 86 /* 0x56 Push the current location for rollback by cut. */
-#define SKIP 87 /* 0x57 On failure skip forward (to the mark) before retrying */
-#define COMMIT 88 /* 0x58 Pattern fails outright if backtracking through this */
-#define CUTGROUP 89 /* 0x59 On failure go to the next alternation in the group */
-#define KEEPS 90 /* 0x5a $& begins here. */
-#define LNBREAK 91 /* 0x5b generic newline pattern */
-#define OPTIMIZED 92 /* 0x5c Placeholder for dump. */
-#define PSEUDO 93 /* 0x5d Pseudo opcode for internal use. */
+#define ANYOF 18 /* 0x12 Match character in (or not in) this class, single char match only */
+#define ANYOFL 19 /* 0x13 Like ANYOF, but /l is in effect */
+#define POSIXD 20 /* 0x14 Some [[:class:]] under /d; the FLAGS field gives which one */
+#define POSIXL 21 /* 0x15 Some [[:class:]] under /l; the FLAGS field gives which one */
+#define POSIXU 22 /* 0x16 Some [[:class:]] under /u; the FLAGS field gives which one */
+#define POSIXA 23 /* 0x17 Some [[:class:]] under /a; the FLAGS field gives which one */
+#define NPOSIXD 24 /* 0x18 complement of POSIXD, [[:^class:]] */
+#define NPOSIXL 25 /* 0x19 complement of POSIXL, [[:^class:]] */
+#define NPOSIXU 26 /* 0x1a complement of POSIXU, [[:^class:]] */
+#define NPOSIXA 27 /* 0x1b complement of POSIXA, [[:^class:]] */
+#define CLUMP 28 /* 0x1c Match any extended grapheme cluster sequence */
+#define BRANCH 29 /* 0x1d Match this alternative, or the next... */
+#define EXACT 30 /* 0x1e Match this string (preceded by length). */
+#define EXACTL 31 /* 0x1f Like EXACT, but /l is in effect. */
+#define EXACTF 32 /* 0x20 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */
+#define EXACTFL 33 /* 0x21 Match this string (not guaranteed to be folded) using /il rules (w/len). */
+#define EXACTFU 34 /* 0x22 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */
+#define EXACTFA 35 /* 0x23 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */
+#define EXACTFU_SS 36 /* 0x24 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */
+#define EXACTFLU8 37 /* 0x25 Rare cirucmstances: like EXACTFU, but is under /l, UTF-8, folded, and everything in it is above 255. */
+#define EXACTFA_NO_TRIE 38 /* 0x26 Match this string (which is not trie-able; not guaranteed to be folded) using /iaa rules (w/len). */
+#define NOTHING 39 /* 0x27 Match empty string. */
+#define TAIL 40 /* 0x28 Match empty string. Can jump here from outside. */
+#define STAR 41 /* 0x29 Match this (simple) thing 0 or more times. */
+#define PLUS 42 /* 0x2a Match this (simple) thing 1 or more times. */
+#define CURLY 43 /* 0x2b Match this simple thing {n,m} times. */
+#define CURLYN 44 /* 0x2c Capture next-after-this simple thing */
+#define CURLYM 45 /* 0x2d Capture this medium-complex thing {n,m} times. */
+#define CURLYX 46 /* 0x2e Match this complex thing {n,m} times. */
+#define WHILEM 47 /* 0x2f Do curly processing and see if rest matches. */
+#define OPEN 48 /* 0x30 Mark this point in input as start of #n. */
+#define CLOSE 49 /* 0x31 Analogous to OPEN. */
+#define REF 50 /* 0x32 Match some already matched string */
+#define REFF 51 /* 0x33 Match already matched string, folded using native charset rules for non-utf8 */
+#define REFFL 52 /* 0x34 Match already matched string, folded in loc. */
+#define REFFU 53 /* 0x35 Match already matched string, folded using unicode rules for non-utf8 */
+#define REFFA 54 /* 0x36 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
+#define NREF 55 /* 0x37 Match some already matched string */
+#define NREFF 56 /* 0x38 Match already matched string, folded using native charset rules for non-utf8 */
+#define NREFFL 57 /* 0x39 Match already matched string, folded in loc. */
+#define NREFFU 58 /* 0x3a Match already matched string, folded using unicode rules for non-utf8 */
+#define NREFFA 59 /* 0x3b Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
+#define LONGJMP 60 /* 0x3c Jump far away. */
+#define BRANCHJ 61 /* 0x3d BRANCH with long offset. */
+#define IFMATCH 62 /* 0x3e Succeeds if the following matches. */
+#define UNLESSM 63 /* 0x3f Fails if the following matches. */
+#define SUSPEND 64 /* 0x40 "Independent" sub-RE. */
+#define IFTHEN 65 /* 0x41 Switch, should be preceded by switcher. */
+#define GROUPP 66 /* 0x42 Whether the group matched. */
+#define EVAL 67 /* 0x43 Execute some Perl code. */
+#define MINMOD 68 /* 0x44 Next operator is not greedy. */
+#define LOGICAL 69 /* 0x45 Next opcode should set the flag only. */
+#define RENUM 70 /* 0x46 Group with independently numbered parens. */
+#define TRIE 71 /* 0x47 Match many EXACT(F[ALU]?)? at once. flags==type */
+#define TRIEC 72 /* 0x48 Same as TRIE, but with embedded charclass data */
+#define AHOCORASICK 73 /* 0x49 Aho Corasick stclass. flags==type */
+#define AHOCORASICKC 74 /* 0x4a Same as AHOCORASICK, but with embedded charclass data */
+#define GOSUB 75 /* 0x4b recurse to paren arg1 at (signed) ofs arg2 */
+#define GOSTART 76 /* 0x4c recurse to start of pattern */
+#define NGROUPP 77 /* 0x4d Whether the group matched. */
+#define INSUBP 78 /* 0x4e Whether we are in a specific recurse. */
+#define DEFINEP 79 /* 0x4f Never execute directly. */
+#define ENDLIKE 80 /* 0x50 Used only for the type field of verbs */
+#define OPFAIL 81 /* 0x51 Same as (?!) */
+#define ACCEPT 82 /* 0x52 Accepts the current matched string. */
+#define VERB 83 /* 0x53 Used only for the type field of verbs */
+#define PRUNE 84 /* 0x54 Pattern fails at this startpoint if no-backtracking through this */
+#define MARKPOINT 85 /* 0x55 Push the current location for rollback by cut. */
+#define SKIP 86 /* 0x56 On failure skip forward (to the mark) before retrying */
+#define COMMIT 87 /* 0x57 Pattern fails outright if backtracking through this */
+#define CUTGROUP 88 /* 0x58 On failure go to the next alternation in the group */
+#define KEEPS 89 /* 0x59 $& begins here. */
+#define LNBREAK 90 /* 0x5a generic newline pattern */
+#define OPTIMIZED 91 /* 0x5b Placeholder for dump. */
+#define PSEUDO 92 /* 0x5c Pseudo opcode for internal use. */
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
@@ -171,7 +170,6 @@ EXTCONST U8 PL_regkind[] = {
NBOUND, /* NBOUNDA */
REG_ANY, /* REG_ANY */
REG_ANY, /* SANY */
- REG_ANY, /* CANY */
ANYOF, /* ANYOF */
ANYOF, /* ANYOFL */
POSIXD, /* POSIXD */
@@ -313,7 +311,6 @@ static const U8 regarglen[] = {
0, /* NBOUNDA */
0, /* REG_ANY */
0, /* SANY */
- 0, /* CANY */
EXTRA_SIZE(struct regnode_1), /* ANYOF */
EXTRA_SIZE(struct regnode_1), /* ANYOFL */
0, /* POSIXD */
@@ -412,7 +409,6 @@ static const char reg_off_by_arg[] = {
0, /* NBOUNDA */
0, /* REG_ANY */
0, /* SANY */
- 0, /* CANY */
0, /* ANYOF */
0, /* ANYOFL */
0, /* POSIXD */
@@ -516,82 +512,81 @@ EXTCONST char * const PL_reg_name[] = {
"NBOUNDA", /* 0x0f */
"REG_ANY", /* 0x10 */
"SANY", /* 0x11 */
- "CANY", /* 0x12 */
- "ANYOF", /* 0x13 */
- "ANYOFL", /* 0x14 */
- "POSIXD", /* 0x15 */
- "POSIXL", /* 0x16 */
- "POSIXU", /* 0x17 */
- "POSIXA", /* 0x18 */
- "NPOSIXD", /* 0x19 */
- "NPOSIXL", /* 0x1a */
- "NPOSIXU", /* 0x1b */
- "NPOSIXA", /* 0x1c */
- "CLUMP", /* 0x1d */
- "BRANCH", /* 0x1e */
- "EXACT", /* 0x1f */
- "EXACTL", /* 0x20 */
- "EXACTF", /* 0x21 */
- "EXACTFL", /* 0x22 */
- "EXACTFU", /* 0x23 */
- "EXACTFA", /* 0x24 */
- "EXACTFU_SS", /* 0x25 */
- "EXACTFLU8", /* 0x26 */
- "EXACTFA_NO_TRIE", /* 0x27 */
- "NOTHING", /* 0x28 */
- "TAIL", /* 0x29 */
- "STAR", /* 0x2a */
- "PLUS", /* 0x2b */
- "CURLY", /* 0x2c */
- "CURLYN", /* 0x2d */
- "CURLYM", /* 0x2e */
- "CURLYX", /* 0x2f */
- "WHILEM", /* 0x30 */
- "OPEN", /* 0x31 */
- "CLOSE", /* 0x32 */
- "REF", /* 0x33 */
- "REFF", /* 0x34 */
- "REFFL", /* 0x35 */
- "REFFU", /* 0x36 */
- "REFFA", /* 0x37 */
- "NREF", /* 0x38 */
- "NREFF", /* 0x39 */
- "NREFFL", /* 0x3a */
- "NREFFU", /* 0x3b */
- "NREFFA", /* 0x3c */
- "LONGJMP", /* 0x3d */
- "BRANCHJ", /* 0x3e */
- "IFMATCH", /* 0x3f */
- "UNLESSM", /* 0x40 */
- "SUSPEND", /* 0x41 */
- "IFTHEN", /* 0x42 */
- "GROUPP", /* 0x43 */
- "EVAL", /* 0x44 */
- "MINMOD", /* 0x45 */
- "LOGICAL", /* 0x46 */
- "RENUM", /* 0x47 */
- "TRIE", /* 0x48 */
- "TRIEC", /* 0x49 */
- "AHOCORASICK", /* 0x4a */
- "AHOCORASICKC", /* 0x4b */
- "GOSUB", /* 0x4c */
- "GOSTART", /* 0x4d */
- "NGROUPP", /* 0x4e */
- "INSUBP", /* 0x4f */
- "DEFINEP", /* 0x50 */
- "ENDLIKE", /* 0x51 */
- "OPFAIL", /* 0x52 */
- "ACCEPT", /* 0x53 */
- "VERB", /* 0x54 */
- "PRUNE", /* 0x55 */
- "MARKPOINT", /* 0x56 */
- "SKIP", /* 0x57 */
- "COMMIT", /* 0x58 */
- "CUTGROUP", /* 0x59 */
- "KEEPS", /* 0x5a */
- "LNBREAK", /* 0x5b */
- "OPTIMIZED", /* 0x5c */
- "PSEUDO", /* 0x5d */
+ "ANYOF", /* 0x12 */
+ "ANYOFL", /* 0x13 */
+ "POSIXD", /* 0x14 */
+ "POSIXL", /* 0x15 */
+ "POSIXU", /* 0x16 */
+ "POSIXA", /* 0x17 */
+ "NPOSIXD", /* 0x18 */
+ "NPOSIXL", /* 0x19 */
+ "NPOSIXU", /* 0x1a */
+ "NPOSIXA", /* 0x1b */
+ "CLUMP", /* 0x1c */
+ "BRANCH", /* 0x1d */
+ "EXACT", /* 0x1e */
+ "EXACTL", /* 0x1f */
+ "EXACTF", /* 0x20 */
+ "EXACTFL", /* 0x21 */
+ "EXACTFU", /* 0x22 */
+ "EXACTFA", /* 0x23 */
+ "EXACTFU_SS", /* 0x24 */
+ "EXACTFLU8", /* 0x25 */
+ "EXACTFA_NO_TRIE", /* 0x26 */
+ "NOTHING", /* 0x27 */
+ "TAIL", /* 0x28 */
+ "STAR", /* 0x29 */
+ "PLUS", /* 0x2a */
+ "CURLY", /* 0x2b */
+ "CURLYN", /* 0x2c */
+ "CURLYM", /* 0x2d */
+ "CURLYX", /* 0x2e */
+ "WHILEM", /* 0x2f */
+ "OPEN", /* 0x30 */
+ "CLOSE", /* 0x31 */
+ "REF", /* 0x32 */
+ "REFF", /* 0x33 */
+ "REFFL", /* 0x34 */
+ "REFFU", /* 0x35 */
+ "REFFA", /* 0x36 */
+ "NREF", /* 0x37 */
+ "NREFF", /* 0x38 */
+ "NREFFL", /* 0x39 */
+ "NREFFU", /* 0x3a */
+ "NREFFA", /* 0x3b */
+ "LONGJMP", /* 0x3c */
+ "BRANCHJ", /* 0x3d */
+ "IFMATCH", /* 0x3e */
+ "UNLESSM", /* 0x3f */
+ "SUSPEND", /* 0x40 */
+ "IFTHEN", /* 0x41 */
+ "GROUPP", /* 0x42 */
+ "EVAL", /* 0x43 */
+ "MINMOD", /* 0x44 */
+ "LOGICAL", /* 0x45 */
+ "RENUM", /* 0x46 */
+ "TRIE", /* 0x47 */
+ "TRIEC", /* 0x48 */
+ "AHOCORASICK", /* 0x49 */
+ "AHOCORASICKC", /* 0x4a */
+ "GOSUB", /* 0x4b */
+ "GOSTART", /* 0x4c */
+ "NGROUPP", /* 0x4d */
+ "INSUBP", /* 0x4e */
+ "DEFINEP", /* 0x4f */
+ "ENDLIKE", /* 0x50 */
+ "OPFAIL", /* 0x51 */
+ "ACCEPT", /* 0x52 */
+ "VERB", /* 0x53 */
+ "PRUNE", /* 0x54 */
+ "MARKPOINT", /* 0x55 */
+ "SKIP", /* 0x56 */
+ "COMMIT", /* 0x57 */
+ "CUTGROUP", /* 0x58 */
+ "KEEPS", /* 0x59 */
+ "LNBREAK", /* 0x5a */
+ "OPTIMIZED", /* 0x5b */
+ "PSEUDO", /* 0x5c */
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
@@ -695,7 +690,6 @@ EXTCONST char * const PL_reg_intflags_name[] = {
"CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
"USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
"NOSCAN", /* 0x00000040 - PREGf_NOSCAN */
- "CANY_SEEN", /* 0x00000080 - PREGf_CANY_SEEN */
"GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */
"GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */
"ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */
@@ -705,7 +699,7 @@ EXTCONST char * const PL_reg_intflags_name[] = {
#endif /* DOINIT */
#ifdef DEBUGGING
-# define REG_INTFLAGS_NAME_SIZE 13
+# define REG_INTFLAGS_NAME_SIZE 12
#endif
/* The following have no fixed length. U8 so we can do strchr() on it. */
@@ -726,7 +720,7 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
EXTCONST U8 PL_varies_bitmask[];
#else
EXTCONST U8 PL_varies_bitmask[] = {
- 0x00, 0x00, 0x00, 0x60, 0x00, 0xFC, 0xF9, 0x5F, 0x06, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0x00, 0x30, 0x00, 0xFE, 0xFC, 0x2F, 0x03, 0x00, 0x00, 0x00
};
#endif /* DOINIT */
@@ -738,8 +732,8 @@ EXTCONST U8 PL_varies_bitmask[] = {
EXTCONST U8 PL_simple[] __attribute__deprecated__;
#else
EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
- REG_ANY, SANY, CANY, ANYOF, ANYOFL, POSIXD, POSIXL, POSIXU, POSIXA,
- NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA,
+ REG_ANY, SANY, ANYOF, ANYOFL, POSIXD, POSIXL, POSIXU, POSIXA, NPOSIXD,
+ NPOSIXL, NPOSIXU, NPOSIXA,
0
};
#endif /* DOINIT */
@@ -748,7 +742,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
EXTCONST U8 PL_simple_bitmask[];
#else
EXTCONST U8 PL_simple_bitmask[] = {
- 0x00, 0x00, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
#endif /* DOINIT */
diff --git a/t/lib/Cname.pm b/t/lib/Cname.pm
index 4a1bc16d85..dad356ae66 100644
--- a/t/lib/Cname.pm
+++ b/t/lib/Cname.pm
@@ -24,16 +24,6 @@ sub translator {
if ( $str eq 'TOO-LONG-STR') {
return 'A' x 256;
}
- if ($str eq 'MALFORMED') {
- $str = "\xDF\xDFabc";
- utf8::upgrade($str);
-
- no warnings 'deprecated';
-
- # Create a malformed in first and second characters.
- $str =~ s/^\C/A/;
- $str =~ s/^(\C\C)\C/$1A/;
- }
return $str;
}
diff --git a/t/op/bop.t b/t/op/bop.t
index 09f2be9ab9..8acd3b2afd 100644
--- a/t/op/bop.t
+++ b/t/op/bop.t
@@ -15,7 +15,7 @@ BEGIN {
# If you find tests are failing, please try adding names to tests to track
# down where the failure is, and supply your new names as a patch.
# (Just-in-time test naming)
-plan tests => 194 + (10*13*2) + 5;
+plan tests => 192 + (10*13*2) + 5;
# numerics
ok ((0xdead & 0xbeef) == 0x9ead);
@@ -430,40 +430,6 @@ SKIP: {
is($b, chr(0x1FE) x 0x0FF . chr(0x101) x 2);
}
-# update to pp_complement() via Coverity
-SKIP: {
- # UTF-EBCDIC is limited to 0x7fffffff and can't encode ~0.
- skip "Complements exceed maximum representable on EBCDIC ", 2 if $::IS_EBCDIC;
-
- my $str = "\x{10000}\x{800}";
- # U+10000 is four bytes in UTF-8/UTF-EBCDIC.
- # U+0800 is three bytes in UTF-8/UTF-EBCDIC.
-
- no warnings "utf8";
- {
- use bytes;
- no warnings 'deprecated';
- $str =~ s/\C\C\z//;
- }
-
- # it's really bogus that (~~malformed) is \0.
- my $ref = "\x{10000}\0";
- is(~~$str, $ref);
-
- # same test, but this time with a longer replacement string that
- # exercises a different branch in pp_subsr()
-
- $str = "\x{10000}\x{800}";
- {
- use bytes;
- no warnings 'deprecated';
- $str =~ s/\C\C\z/\0\0\0/;
- }
-
- # it's also bogus that (~~malformed) is \0\0\0\0.
- my $ref = "\x{10000}\0\0\0\0";
- is(~~$str, $ref, "use bytes with long replacement");
-}
# New string- and number-specific bitwise ops
{
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index 891bb66061..230fd891a6 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -28,57 +28,6 @@ run_tests() unless caller;
sub run_tests {
{
- no warnings 'deprecated';
-
- my $message = '\C matches octet';
- $_ = "a\x{100}b";
- ok(/(.)(\C)(\C)(.)/, $message);
- is($1, "a", $message);
- if ($::IS_ASCII) { # ASCII (or equivalent), should be UTF-8
- is($2, "\xC4", $message);
- is($3, "\x80", $message);
- }
- elsif ($::IS_EBCDIC) { # EBCDIC (or equivalent), should be UTF-EBCDIC
- is($2, "\x8C", $message);
- is($3, "\x41", $message);
- }
- else {
- SKIP: {
- ok 0, "Unexpected platform", "ord ('A') =" . ord 'A';
- skip "Unexpected platform";
- }
- }
- is($4, "b", $message);
- }
-
- {
- no warnings 'deprecated';
-
- my $message = '\C matches octet';
- $_ = "\x{100}";
- ok(/(\C)/g, $message);
- if ($::IS_ASCII) {
- is($1, "\xC4", $message);
- }
- elsif ($::IS_EBCDIC) {
- is($1, "\x8C", $message);
- }
- else {
- ok 0, "Unexpected platform", "ord ('A') = " . ord 'A';
- }
- ok(/(\C)/g, $message);
- if ($::IS_ASCII) {
- is($1, "\x80", $message);
- }
- elsif ($::IS_EBCDIC) {
- is($1, "\x41", $message);
- }
- else {
- ok 0, "Unexpected platform", "ord ('A') = " . ord 'A';
- }
- }
-
- {
# Japhy -- added 03/03/2001
() = (my $str = "abc") =~ /(...)/;
$str = "def";
@@ -284,24 +233,6 @@ sub run_tests {
}
{
- no warnings 'deprecated';
-
- my $message = '. matches \n with /s';
- my $str1 = "foo\nbar";
- my $str2 = "foo\n\x{100}bar";
- my ($a, $b) = map {chr} $::IS_ASCII ? (0xc4, 0x80) : (0x8c, 0x41);
- my @a;
- @a = $str1 =~ /./g; is(@a, 6, $message); is("@a", "f o o b a r", $message);
- @a = $str1 =~ /./gs; is(@a, 7, $message); is("@a", "f o o \n b a r", $message);
- @a = $str1 =~ /\C/g; is(@a, 7, $message); is("@a", "f o o \n b a r", $message);
- @a = $str1 =~ /\C/gs; is(@a, 7, $message); is("@a", "f o o \n b a r", $message);
- @a = $str2 =~ /./g; is(@a, 7, $message); is("@a", "f o o \x{100} b a r", $message);
- @a = $str2 =~ /./gs; is(@a, 8, $message); is("@a", "f o o \n \x{100} b a r", $message);
- @a = $str2 =~ /\C/g; is(@a, 9, $message); is("@a", "f o o \n $a $b b a r", $message);
- @a = $str2 =~ /\C/gs; is(@a, 9, $message); is("@a", "f o o \n $a $b b a r", $message);
- }
-
- {
no warnings 'digit';
# Check that \x## works. 5.6.1 and 5.005_03 fail some of these.
my $x;
@@ -492,11 +423,6 @@ sub run_tests {
=~ /^(\X)!/ &&
$1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}", $message);
- no warnings 'deprecated';
-
- $message = '\C and \X';
- like("!abc!", qr/a\Cc/, $message);
- like("!abc!", qr/a\Xc/, $message);
}
{
@@ -552,13 +478,6 @@ sub run_tests {
$& eq "Francais", $message);
ok("Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran.ais/ &&
$& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", $message);
- {
- no warnings 'deprecated';
- ok("Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Cais/ &&
- $& eq "Francais", $message);
- # COMBINING CEDILLA is two bytes when encoded
- like("Franc\N{COMBINING CEDILLA}ais", qr/Franc\C\Cais/, $message);
- }
ok("Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Xais/ &&
$& eq "Francais", $message);
ok("Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran\Xais/ &&
@@ -1114,8 +1033,6 @@ sub run_tests {
# differently
undef $w;
eval q [ok "\N{TOO-LONG-STR}" =~ /^\N{TOO-LONG-STR}$/, 'Verify that what once was too long a string works'];
- eval 'q(syntax error) =~ /\N{MALFORMED}/';
- ok $@ && $@ =~ /Malformed/, 'Verify that malformed utf8 gives an error';
eval 'q() =~ /\N{4F}/';
ok $@ && $@ =~ /Invalid character/, 'Verify that leading digit in name gives error';
eval 'q() =~ /\N{COM,MA}/';
diff --git a/t/re/pat_rt_report.t b/t/re/pat_rt_report.t
index ed8fafcc78..f35e72c35f 100644
--- a/t/re/pat_rt_report.t
+++ b/t/re/pat_rt_report.t
@@ -20,7 +20,7 @@ use warnings;
use 5.010;
use Config;
-plan tests => 2532; # Update this when adding/deleting tests.
+plan tests => 2500; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -89,13 +89,6 @@ sub run_tests {
}
{
- no warnings 'deprecated';
- my $message = '\C and É; Bug 20001230.002';
- ok("École" =~ /^\C\C(.)/ && $1 eq 'c', $message);
- like("École", qr/^\C\C(c)/, $message);
- }
-
- {
# The original bug report had 'no utf8' here but that was irrelevant.
my $message = "Don't dump core; Bug 20010306.008";
@@ -233,59 +226,6 @@ sub run_tests {
}
{
- our $a = "x\x{100}";
- chop $a; # Leaves the UTF-8 flag
- $a .= "y"; # 1 byte before 'y'.
-
- no warnings 'deprecated';
-
- like($a, qr/^\C/, 'match one \C on 1-byte UTF-8; Bug 15763');
- like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763');
-
- like($a, qr/^\Cy/, 'match \Cy; Bug 15763');
- like($a, qr/^\C{1}y/, 'match \C{1}y; Bug 15763');
-
- unlike($a, qr/^\C\Cy/, q {don't match two \Cy; Bug 15763});
- unlike($a, qr/^\C{2}y/, q {don't match \C{2}y; Bug 15763});
-
- $a = "\x{100}y"; # 2 bytes before "y"
-
- like($a, qr/^\C/, 'match one \C on 2-byte UTF-8; Bug 15763');
- like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763');
- like($a, qr/^\C\C/, 'match two \C; Bug 15763');
- like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763');
-
- like($a, qr/^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte; Bug 15763');
- like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763');
-
- like($a, qr/^\C\Cy/, 'match two \C; Bug 15763');
- like($a, qr/^\C{2}y/, 'match \C{2}; Bug 15763');
-
- unlike($a, qr/^\C\C\Cy/, q {don't match three \Cy; Bug 15763});
- unlike($a, qr/^\C{2}\Cy/, q {don't match \C{2}\Cy; Bug 15763});
- unlike($a, qr/^\C{3}y/, q {don't match \C{3}y; Bug 15763});
-
- $a = "\x{1000}y"; # 3 bytes before "y"
-
- like($a, qr/^\C/, 'match one \C on three-byte UTF-8; Bug 15763');
- like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763');
- like($a, qr/^\C\C/, 'match two \C; Bug 15763');
- like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763');
- like($a, qr/^\C\C\C/, 'match three \C; Bug 15763');
- like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763');
-
- like($a, qr/^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte; Bug 15763');
- like($a, qr/^\C{4}/, 'match \C{4}; Bug 15763');
-
- like($a, qr/^\C\C\Cy/, 'match three \Cy; Bug 15763');
- like($a, qr/^\C{3}y/, 'match \C{3}y; Bug 15763');
-
- unlike($a, qr/^\C\C\C\Cy/, q {don't match four \Cy; Bug 15763});
- unlike($a, qr/^\C{4}y/, q {don't match \C{4}y; Bug 15763});
- }
-
-
- {
my $message = 'UTF-8 matching; Bug 15397';
like("\x{100}", qr/\x{100}/, $message);
like("\x{100}", qr/(\x{100})/, $message);
@@ -1173,13 +1113,6 @@ EOP
# in the report above that only happened in a thread.
my $s = "\x{1ff}" . "f" x 32;
ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap");
-
- # this one segfaulted under the conditions above
- # of course, CANY is evil, maybe it should crash
- {
- no warnings 'deprecated';
- ok($s =~ /.\C+/, "CANY pointer wrap");
- }
}
} # End of sub run_tests