diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-03-28 19:28:04 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-05-22 08:24:17 -0600 |
commit | 075b9d7d9a6d4473b240a047655e507c8baa6db3 (patch) | |
tree | 0920fbe993831debfd6dd1588a8c7c975114746e | |
parent | c6e8e4a93db00ba34769d5e055b9618f2779e205 (diff) | |
download | perl-075b9d7d9a6d4473b240a047655e507c8baa6db3.tar.gz |
Experimentally add VT to \s definition
This commit is the minimal necessary to get \s to match the vertical
tab. It is being done early in the 5.17 series in order to see what
repercussions there might be from doing this.
It may well be that we decide that this change will require a 'use
feature' to activate. In any event there is significant documentation
of the behavior without the VT that this patch does not address at all.
Tom Christiansen asked Larry Wall why \s did not include VT, and
reported that Larry replied that he did not remember, but had no
objections to adding it.
-rw-r--r-- | charclass_invlists.h | 8 | ||||
-rw-r--r-- | l1_char_class_tab.h | 2 | ||||
-rw-r--r-- | lib/feature/unicode_strings.t | 1 | ||||
-rw-r--r-- | lib/unicore/mktables | 5 | ||||
-rw-r--r-- | pod/perldelta.pod | 8 | ||||
-rw-r--r-- | t/re/pat.t | 2 |
6 files changed, 16 insertions, 10 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index 368410fd37..27f7e827e1 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -75,28 +75,24 @@ UV VertSpace_invlist[] = { }; UV PerlSpace_invlist[] = { - 6, /* Number of elements */ + 4, /* Number of elements */ 0, /* Current iteration position */ 1064334010, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 9, - 11, - 12, 14, 32, 33 }; UV XPerlSpace_invlist[] = { - 24, /* Number of elements */ + 22, /* Number of elements */ 0, /* Current iteration position */ 1064334010, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 9, - 11, - 12, 14, 32, 33, diff --git a/l1_char_class_tab.h b/l1_char_class_tab.h index d649c89d8a..3698d95e02 100644 --- a/l1_char_class_tab.h +++ b/l1_char_class_tab.h @@ -16,7 +16,7 @@ /* U+08 BS */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_QUOTEMETA, /* U+09 HT */ _CC_BLANK_A|_CC_BLANK_L1|_CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA, /* U+0A LF */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA, -/* U+0B VT */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_QUOTEMETA, +/* U+0B VT */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA, /* U+0C FF */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA, /* U+0D CR */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA, /* U+0E SO */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_QUOTEMETA, diff --git a/lib/feature/unicode_strings.t b/lib/feature/unicode_strings.t index 7e557b2bc7..8bd536f258 100644 --- a/lib/feature/unicode_strings.t +++ b/lib/feature/unicode_strings.t @@ -172,6 +172,7 @@ for my $i ( 0x30 .. 0x39, # 0-9 my @s = (0) x 256; $s[ord_latin1_to_native 0x09] = 1; # Tab $s[ord_latin1_to_native 0x0A] = 1; # LF +$s[ord_latin1_to_native 0x0B] = 1; # VT $s[ord_latin1_to_native 0x0C] = 1; # FF $s[ord_latin1_to_native 0x0D] = 1; # CR $s[ord_latin1_to_native 0x20] = 1; # SPACE diff --git a/lib/unicore/mktables b/lib/unicore/mktables index b4d980b85a..ab029e48ed 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -12521,11 +12521,12 @@ sub compile_perl() { # Perl's traditional space doesn't include Vertical Tab my $XPerlSpace = $perl->add_match_table('XPerlSpace', Description => '\s, including beyond ASCII', - Initialize => $Space - 0x000B, + #Initialize => $Space - 0x000B, + Initialize => $Space, ); $XPerlSpace->add_alias('SpacePerl'); # A pre-existing synonym my $PerlSpace = $perl->add_match_table('PerlSpace', - Description => '\s, restricted to ASCII = [ \f\n\r\t]', + Description => '\s, restricted to ASCII = [ \f\n\r\t] plus vertical tab', Initialize => $XPerlSpace & $ASCII, ); diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 1264cbeb61..abfc4984fa 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -46,6 +46,14 @@ XXX For a release on a stable branch, this section aspires to be: [ List each incompatible change as a =head2 entry ] +=head2 C<\s> in regular expressions now matches a Vertical Tab (experimental) + +This is an experiment early in the development cycle to see what +repercussions arise from this change. It may well be that we decide +to require a C<"use feature"> to activate this behavior. Because +of the experimental nature of this, which may be reversed, the +documentation has not been changed to reflect it. + =head1 Deprecations XXX Any deprecated features, syntax, modules etc. should be listed here. diff --git a/t/re/pat.t b/t/re/pat.t index 882368e2a7..b34e0930ab 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -985,7 +985,7 @@ sub run_tests { my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; - is("@space0", "cr ff lf spc tab", $message); + is("@space0", "cr ff lf spc tab vt", $message); is("@space1", "cr ff lf spc tab vt", $message); is("@space2", "spc tab", $message); } |