diff options
-rw-r--r-- | MANIFEST | 2 | ||||
-rw-r--r-- | lib/unicode/Is/Blank.pl | 12 | ||||
-rw-r--r-- | lib/unicode/Is/SpacePerl.pl | 14 | ||||
-rwxr-xr-x | lib/unicode/mktables.PL | 5 | ||||
-rw-r--r-- | pod/perlre.pod | 6 | ||||
-rw-r--r-- | regcomp.c | 15 | ||||
-rw-r--r-- | regexec.c | 2 | ||||
-rwxr-xr-x | t/op/pat.t | 6 | ||||
-rw-r--r-- | utf8.c | 2 |
9 files changed, 50 insertions, 14 deletions
@@ -922,6 +922,7 @@ lib/unicode/Is/BidiRLE.pl Unicode character database lib/unicode/Is/BidiRLO.pl Unicode character database lib/unicode/Is/BidiS.pl Unicode character database lib/unicode/Is/BidiWS.pl Unicode character database +lib/unicode/Is/Blank.pl Unicode character database lib/unicode/Is/C.pl Unicode character database lib/unicode/Is/Cc.pl Unicode character database lib/unicode/Is/Cf.pl Unicode character database @@ -1010,6 +1011,7 @@ lib/unicode/Is/Sk.pl Unicode character database lib/unicode/Is/Sm.pl Unicode character database lib/unicode/Is/So.pl Unicode character database lib/unicode/Is/Space.pl Unicode character database +lib/unicode/Is/SpacePerl.pl Unicode character database lib/unicode/Is/SylA.pl Unicode character database lib/unicode/Is/SylAA.pl Unicode character database lib/unicode/Is/SylAAI.pl Unicode character database diff --git a/lib/unicode/Is/Blank.pl b/lib/unicode/Is/Blank.pl new file mode 100644 index 0000000000..86429218e3 --- /dev/null +++ b/lib/unicode/Is/Blank.pl @@ -0,0 +1,12 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.301. +# Any changes made here will be lost! +return <<'END'; +0009 +0020 +00a0 +1680 +2000 200b +202f +3000 +END diff --git a/lib/unicode/Is/SpacePerl.pl b/lib/unicode/Is/SpacePerl.pl new file mode 100644 index 0000000000..2bb74dea8a --- /dev/null +++ b/lib/unicode/Is/SpacePerl.pl @@ -0,0 +1,14 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.301. +# Any changes made here will be lost! +return <<'END'; +0009 000a +000c 000d +0020 +00a0 +1680 +2000 200b +2028 2029 +202f +3000 +END diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index d8b57b6a83..82b35ef97d 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -25,8 +25,13 @@ mkdir "To", 0755; # 000B: VERTICAL TABULATION # 000C: FORM FEED # 000D: CARRIAGE RETURN + # 0020: SPACE ['IsSpace', '$cat =~ /^Z/ || $code =~ /^(0009|000A|000B|000C|000D)$/', ''], + ['IsSpacePerl', + '$cat =~ /^Z/ || + $code =~ /^(0009|000A|000C|000D)$/', ''], + ['IsBlank', '$cat =~ /^Z[^lp]$/ || $code eq "0009"', ''], ['IsDigit', '$cat =~ /^Nd$/', ''], ['IsUpper', '$cat =~ /^L[ut]$/', ''], ['IsLower', '$cat =~ /^Ll$/', ''], diff --git a/pod/perlre.pod b/pod/perlre.pod index 182f5bd03f..c5ecb13c40 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -225,19 +225,21 @@ whole character class. For example: matches zero, one, any alphabetic character, and the percentage sign. If the C<utf8> pragma is used, the following equivalences to Unicode -\p{} constructs hold: +\p{} constructs and equivalent backslash character classes (if available), +will hold: alpha IsAlpha alnum IsAlnum ascii IsASCII blank IsSpace cntrl IsCntrl - digit IsDigit + digit IsDigit \d graph IsGraph lower IsLower print IsPrint punct IsPunct space IsSpace + IsSpacePerl \s upper IsUpper word IsWord xdigit IsXDigit @@ -3705,7 +3705,7 @@ S_regclassutf8(pTHX_ RExC_state_t *pRExC_state) flags |= ANYOF_FOLD; if (LOC) flags |= ANYOF_LOCALE; - listsv = newSVpvn("# comment\n",10); + listsv = newSVpvn("# comment\n", 10); } if (!SIZE_ONLY && ckWARN(WARN_REGEXP)) @@ -3868,15 +3868,16 @@ S_regclassutf8(pTHX_ RExC_state_t *pRExC_state) case ANYOF_NPUNCT: Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; case ANYOF_SPACE: - case ANYOF_PSXSPC: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");break; + case ANYOF_NSPACE: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");break; case ANYOF_BLANK: - /* Not very true for PSXSPC and BLANK - * but not feeling like creating IsPOSIXSpace and - * IsBlank right now. --jhi */ + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n"); break; + case ANYOF_NBLANK: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n"); break; + case ANYOF_PSXSPC: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; - case ANYOF_NSPACE: case ANYOF_NPSXSPC: - case ANYOF_NBLANK: Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; case ANYOF_UPPER: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; @@ -3773,7 +3773,7 @@ S_reginclass(pTHX_ register regnode *p, register I32 c) (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) || (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) || (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) || - (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) || + (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) || (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c)) ) /* How's that for a conditional? */ { diff --git a/t/op/pat.t b/t/op/pat.t index 8c3638c167..aaec39d259 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -1107,15 +1107,15 @@ my @space1 = sort grep { $space{$_} =~ /[[:space:]]/ } keys %space; my @space2 = sort grep { $space{$_} =~ /[[:blank:]]/ } keys %space; print "not " unless "@space0" eq "cr ff lf spc tab"; -print "ok $test\n"; +print "ok $test # @space0\n"; $test++; print "not " unless "@space1" eq "cr ff lf spc tab vt"; -print "ok $test\n"; +print "ok $test # @space1\n"; $test++; print "not " unless "@space2" eq "spc tab"; -print "ok $test\n"; +print "ok $test # @space2\n"; $test++; # bugid 20001021.005 - this caused a SEGV @@ -899,7 +899,7 @@ Perl_is_utf8_space(pTHX_ U8 *p) if (!is_utf8_char(p)) return FALSE; if (!PL_utf8_space) - PL_utf8_space = swash_init("utf8", "IsSpace", &PL_sv_undef, 0, 0); + PL_utf8_space = swash_init("utf8", "IsSpacePerl", &PL_sv_undef, 0, 0); return swash_fetch(PL_utf8_space, p); } |