summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--charclass_invlists.h14
-rw-r--r--embedvar.h1
-rw-r--r--intrpvar.h1
-rw-r--r--regcomp.c5
-rw-r--r--regen/mk_invlists.pl9
-rw-r--r--sv.c1
6 files changed, 28 insertions, 3 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index f3de65ab7a..c2f9f248b1 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -772,4 +772,18 @@ static const UV _Perl_Multi_Char_Folds_invlist[] = {
#endif
+#ifndef PERL_IN_XSUB_RE
+
+static const UV UpperLatin1_invlist[] = {
+ 3, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
+ 128,
+ 256
+};
+
+#endif
+
/* ex: set ro: */
diff --git a/embedvar.h b/embedvar.h
index 75a0cc9237..2349c8c978 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -66,6 +66,7 @@
#define PL_Sock (vTHX->ISock)
#define PL_StdIO (vTHX->IStdIO)
#define PL_Sv (vTHX->ISv)
+#define PL_UpperLatin1 (vTHX->IUpperLatin1)
#define PL_XPosix_ptrs (vTHX->IXPosix_ptrs)
#define PL_Xpv (vTHX->IXpv)
#define PL_an (vTHX->Ian)
diff --git a/intrpvar.h b/intrpvar.h
index f4efc329b1..9cfe2c639d 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -578,6 +578,7 @@ PERLVAR(I, numeric_radix_sv, SV *) /* The radix separator if not '.' */
/* Unicode inversion lists */
PERLVAR(I, ASCII, SV *)
PERLVAR(I, Latin1, SV *)
+PERLVAR(I, UpperLatin1, SV *) /* Code points 128 - 255 */
PERLVAR(I, AboveLatin1, SV *)
PERLVAR(I, NonL1NonFinalFold, SV *)
diff --git a/regcomp.c b/regcomp.c
index 2441a69cd9..49dd2462c7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -5547,6 +5547,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
PL_ASCII = _new_invlist_C_array(ASCII_invlist);
PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
+ PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
PL_L1Posix_ptrs[_CC_ALPHANUMERIC]
= _new_invlist_C_array(L1PosixAlnum_invlist);
@@ -13755,10 +13756,8 @@ parseit:
/* Under /d, we put into a separate list the Latin1 things that
* match only when the target string is utf8 */
SV* nonascii_but_latin1_properties = NULL;
- _invlist_intersection(posixes, PL_Latin1,
+ _invlist_intersection(posixes, PL_UpperLatin1,
&nonascii_but_latin1_properties);
- _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII,
- &nonascii_but_latin1_properties);
_invlist_subtract(posixes, nonascii_but_latin1_properties,
&posixes);
if (cp_list) {
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index b857b10580..954e71412a 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -121,6 +121,14 @@ sub _Perl_Non_Final_Folds {
return mk_invlist_from_cp_list(\@is_non_final_fold);
}
+sub UpperLatin1 {
+ my @upper_latin1;
+ for my $i (0 .. 255) { # Complicated because of EBCDIC
+ push @upper_latin1, $i if chr($i) =~ /[[:^ascii:]]/;
+ }
+ return mk_invlist_from_cp_list(\@upper_latin1);
+}
+
output_invlist("Latin1", [ 0, 256 ]);
output_invlist("AboveLatin1", [ 256 ]);
@@ -180,6 +188,7 @@ for my $prop (qw(
XPosixXDigit
&NonL1_Perl_Non_Final_Folds
&_Perl_Multi_Char_Folds
+ &UpperLatin1
)
) {
diff --git a/sv.c b/sv.c
index f5326b4c0a..f3c0d2c4cd 100644
--- a/sv.c
+++ b/sv.c
@@ -13712,6 +13712,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
/* Unicode inversion lists */
PL_ASCII = sv_dup_inc(proto_perl->IASCII, param);
PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param);
+ PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param);
PL_AboveLatin1 = sv_dup_inc(proto_perl->IAboveLatin1, param);
PL_NonL1NonFinalFold = sv_dup_inc(proto_perl->INonL1NonFinalFold, param);