summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST2
-rw-r--r--lib/unicode/Is/Blank.pl12
-rw-r--r--lib/unicode/Is/SpacePerl.pl14
-rwxr-xr-xlib/unicode/mktables.PL5
-rw-r--r--pod/perlre.pod6
-rw-r--r--regcomp.c15
-rw-r--r--regexec.c2
-rwxr-xr-xt/op/pat.t6
-rw-r--r--utf8.c2
9 files changed, 50 insertions, 14 deletions
diff --git a/MANIFEST b/MANIFEST
index 4607250398..7da209ef0f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -922,6 +922,7 @@ lib/unicode/Is/BidiRLE.pl Unicode character database
lib/unicode/Is/BidiRLO.pl Unicode character database
lib/unicode/Is/BidiS.pl Unicode character database
lib/unicode/Is/BidiWS.pl Unicode character database
+lib/unicode/Is/Blank.pl Unicode character database
lib/unicode/Is/C.pl Unicode character database
lib/unicode/Is/Cc.pl Unicode character database
lib/unicode/Is/Cf.pl Unicode character database
@@ -1010,6 +1011,7 @@ lib/unicode/Is/Sk.pl Unicode character database
lib/unicode/Is/Sm.pl Unicode character database
lib/unicode/Is/So.pl Unicode character database
lib/unicode/Is/Space.pl Unicode character database
+lib/unicode/Is/SpacePerl.pl Unicode character database
lib/unicode/Is/SylA.pl Unicode character database
lib/unicode/Is/SylAA.pl Unicode character database
lib/unicode/Is/SylAAI.pl Unicode character database
diff --git a/lib/unicode/Is/Blank.pl b/lib/unicode/Is/Blank.pl
new file mode 100644
index 0000000000..86429218e3
--- /dev/null
+++ b/lib/unicode/Is/Blank.pl
@@ -0,0 +1,12 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.301.
+# Any changes made here will be lost!
+return <<'END';
+0009
+0020
+00a0
+1680
+2000 200b
+202f
+3000
+END
diff --git a/lib/unicode/Is/SpacePerl.pl b/lib/unicode/Is/SpacePerl.pl
new file mode 100644
index 0000000000..2bb74dea8a
--- /dev/null
+++ b/lib/unicode/Is/SpacePerl.pl
@@ -0,0 +1,14 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.301.
+# Any changes made here will be lost!
+return <<'END';
+0009 000a
+000c 000d
+0020
+00a0
+1680
+2000 200b
+2028 2029
+202f
+3000
+END
diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL
index d8b57b6a83..82b35ef97d 100755
--- a/lib/unicode/mktables.PL
+++ b/lib/unicode/mktables.PL
@@ -25,8 +25,13 @@ mkdir "To", 0755;
# 000B: VERTICAL TABULATION
# 000C: FORM FEED
# 000D: CARRIAGE RETURN
+ # 0020: SPACE
['IsSpace', '$cat =~ /^Z/ ||
$code =~ /^(0009|000A|000B|000C|000D)$/', ''],
+ ['IsSpacePerl',
+ '$cat =~ /^Z/ ||
+ $code =~ /^(0009|000A|000C|000D)$/', ''],
+ ['IsBlank', '$cat =~ /^Z[^lp]$/ || $code eq "0009"', ''],
['IsDigit', '$cat =~ /^Nd$/', ''],
['IsUpper', '$cat =~ /^L[ut]$/', ''],
['IsLower', '$cat =~ /^Ll$/', ''],
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 182f5bd03f..c5ecb13c40 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -225,19 +225,21 @@ whole character class. For example:
matches zero, one, any alphabetic character, and the percentage sign.
If the C<utf8> pragma is used, the following equivalences to Unicode
-\p{} constructs hold:
+\p{} constructs and equivalent backslash character classes (if available),
+will hold:
alpha IsAlpha
alnum IsAlnum
ascii IsASCII
blank IsSpace
cntrl IsCntrl
- digit IsDigit
+ digit IsDigit \d
graph IsGraph
lower IsLower
print IsPrint
punct IsPunct
space IsSpace
+ IsSpacePerl \s
upper IsUpper
word IsWord
xdigit IsXDigit
diff --git a/regcomp.c b/regcomp.c
index 784e83e43b..3b4f481b1c 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3705,7 +3705,7 @@ S_regclassutf8(pTHX_ RExC_state_t *pRExC_state)
flags |= ANYOF_FOLD;
if (LOC)
flags |= ANYOF_LOCALE;
- listsv = newSVpvn("# comment\n",10);
+ listsv = newSVpvn("# comment\n", 10);
}
if (!SIZE_ONLY && ckWARN(WARN_REGEXP))
@@ -3868,15 +3868,16 @@ S_regclassutf8(pTHX_ RExC_state_t *pRExC_state)
case ANYOF_NPUNCT:
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
case ANYOF_SPACE:
- case ANYOF_PSXSPC:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");break;
+ case ANYOF_NSPACE:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");break;
case ANYOF_BLANK:
- /* Not very true for PSXSPC and BLANK
- * but not feeling like creating IsPOSIXSpace and
- * IsBlank right now. --jhi */
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n"); break;
+ case ANYOF_NBLANK:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n"); break;
+ case ANYOF_PSXSPC:
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
- case ANYOF_NSPACE:
case ANYOF_NPSXSPC:
- case ANYOF_NBLANK:
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
case ANYOF_UPPER:
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
diff --git a/regexec.c b/regexec.c
index 018c6c8963..18c06d553f 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3773,7 +3773,7 @@ S_reginclass(pTHX_ register regnode *p, register I32 c)
(ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
(ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
(ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
- (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
+ (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
(ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
) /* How's that for a conditional? */
{
diff --git a/t/op/pat.t b/t/op/pat.t
index 8c3638c167..aaec39d259 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -1107,15 +1107,15 @@ my @space1 = sort grep { $space{$_} =~ /[[:space:]]/ } keys %space;
my @space2 = sort grep { $space{$_} =~ /[[:blank:]]/ } keys %space;
print "not " unless "@space0" eq "cr ff lf spc tab";
-print "ok $test\n";
+print "ok $test # @space0\n";
$test++;
print "not " unless "@space1" eq "cr ff lf spc tab vt";
-print "ok $test\n";
+print "ok $test # @space1\n";
$test++;
print "not " unless "@space2" eq "spc tab";
-print "ok $test\n";
+print "ok $test # @space2\n";
$test++;
# bugid 20001021.005 - this caused a SEGV
diff --git a/utf8.c b/utf8.c
index e313258574..9e943acb10 100644
--- a/utf8.c
+++ b/utf8.c
@@ -899,7 +899,7 @@ Perl_is_utf8_space(pTHX_ U8 *p)
if (!is_utf8_char(p))
return FALSE;
if (!PL_utf8_space)
- PL_utf8_space = swash_init("utf8", "IsSpace", &PL_sv_undef, 0, 0);
+ PL_utf8_space = swash_init("utf8", "IsSpacePerl", &PL_sv_undef, 0, 0);
return swash_fetch(PL_utf8_space, p);
}