diff options
-rw-r--r-- | charclass_invlists.h | 14 | ||||
-rw-r--r-- | embedvar.h | 1 | ||||
-rw-r--r-- | intrpvar.h | 1 | ||||
-rw-r--r-- | regcomp.c | 5 | ||||
-rw-r--r-- | regen/mk_invlists.pl | 9 | ||||
-rw-r--r-- | sv.c | 1 |
6 files changed, 28 insertions, 3 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index f3de65ab7a..c2f9f248b1 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -772,4 +772,18 @@ static const UV _Perl_Multi_Char_Folds_invlist[] = { #endif +#ifndef PERL_IN_XSUB_RE + +static const UV UpperLatin1_invlist[] = { + 3, /* Number of elements */ + 148565664, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, + 128, + 256 +}; + +#endif + /* ex: set ro: */ diff --git a/embedvar.h b/embedvar.h index 75a0cc9237..2349c8c978 100644 --- a/embedvar.h +++ b/embedvar.h @@ -66,6 +66,7 @@ #define PL_Sock (vTHX->ISock) #define PL_StdIO (vTHX->IStdIO) #define PL_Sv (vTHX->ISv) +#define PL_UpperLatin1 (vTHX->IUpperLatin1) #define PL_XPosix_ptrs (vTHX->IXPosix_ptrs) #define PL_Xpv (vTHX->IXpv) #define PL_an (vTHX->Ian) diff --git a/intrpvar.h b/intrpvar.h index f4efc329b1..9cfe2c639d 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -578,6 +578,7 @@ PERLVAR(I, numeric_radix_sv, SV *) /* The radix separator if not '.' */ /* Unicode inversion lists */ PERLVAR(I, ASCII, SV *) PERLVAR(I, Latin1, SV *) +PERLVAR(I, UpperLatin1, SV *) /* Code points 128 - 255 */ PERLVAR(I, AboveLatin1, SV *) PERLVAR(I, NonL1NonFinalFold, SV *) @@ -5547,6 +5547,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist); PL_ASCII = _new_invlist_C_array(ASCII_invlist); PL_Latin1 = _new_invlist_C_array(Latin1_invlist); + PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist); PL_L1Posix_ptrs[_CC_ALPHANUMERIC] = _new_invlist_C_array(L1PosixAlnum_invlist); @@ -13755,10 +13756,8 @@ parseit: /* Under /d, we put into a separate list the Latin1 things that * match only when the target string is utf8 */ SV* nonascii_but_latin1_properties = NULL; - _invlist_intersection(posixes, PL_Latin1, + _invlist_intersection(posixes, PL_UpperLatin1, &nonascii_but_latin1_properties); - _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII, - &nonascii_but_latin1_properties); _invlist_subtract(posixes, nonascii_but_latin1_properties, &posixes); if (cp_list) { diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index b857b10580..954e71412a 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -121,6 +121,14 @@ sub _Perl_Non_Final_Folds { return mk_invlist_from_cp_list(\@is_non_final_fold); } +sub UpperLatin1 { + my @upper_latin1; + for my $i (0 .. 255) { # Complicated because of EBCDIC + push @upper_latin1, $i if chr($i) =~ /[[:^ascii:]]/; + } + return mk_invlist_from_cp_list(\@upper_latin1); +} + output_invlist("Latin1", [ 0, 256 ]); output_invlist("AboveLatin1", [ 256 ]); @@ -180,6 +188,7 @@ for my $prop (qw( XPosixXDigit &NonL1_Perl_Non_Final_Folds &_Perl_Multi_Char_Folds + &UpperLatin1 ) ) { @@ -13712,6 +13712,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* Unicode inversion lists */ PL_ASCII = sv_dup_inc(proto_perl->IASCII, param); PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param); + PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param); PL_AboveLatin1 = sv_dup_inc(proto_perl->IAboveLatin1, param); PL_NonL1NonFinalFold = sv_dup_inc(proto_perl->INonL1NonFinalFold, param); |