summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-03-28 19:28:04 -0600
committerKarl Williamson <public@khwilliamson.com>2012-05-22 08:24:17 -0600
commit075b9d7d9a6d4473b240a047655e507c8baa6db3 (patch)
tree0920fbe993831debfd6dd1588a8c7c975114746e
parentc6e8e4a93db00ba34769d5e055b9618f2779e205 (diff)
downloadperl-075b9d7d9a6d4473b240a047655e507c8baa6db3.tar.gz
Experimentally add VT to \s definition
This commit is the minimal necessary to get \s to match the vertical tab. It is being done early in the 5.17 series in order to see what repercussions there might be from doing this. It may well be that we decide that this change will require a 'use feature' to activate. In any event there is significant documentation of the behavior without the VT that this patch does not address at all. Tom Christiansen asked Larry Wall why \s did not include VT, and reported that Larry replied that he did not remember, but had no objections to adding it.
-rw-r--r--charclass_invlists.h8
-rw-r--r--l1_char_class_tab.h2
-rw-r--r--lib/feature/unicode_strings.t1
-rw-r--r--lib/unicore/mktables5
-rw-r--r--pod/perldelta.pod8
-rw-r--r--t/re/pat.t2
6 files changed, 16 insertions, 10 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 368410fd37..27f7e827e1 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -75,28 +75,24 @@ UV VertSpace_invlist[] = {
};
UV PerlSpace_invlist[] = {
- 6, /* Number of elements */
+ 4, /* Number of elements */
0, /* Current iteration position */
1064334010, /* Version and data structure type */
1, /* 0 if this is the first element of the list proper;
1 if the next element is the first */
9,
- 11,
- 12,
14,
32,
33
};
UV XPerlSpace_invlist[] = {
- 24, /* Number of elements */
+ 22, /* Number of elements */
0, /* Current iteration position */
1064334010, /* Version and data structure type */
1, /* 0 if this is the first element of the list proper;
1 if the next element is the first */
9,
- 11,
- 12,
14,
32,
33,
diff --git a/l1_char_class_tab.h b/l1_char_class_tab.h
index d649c89d8a..3698d95e02 100644
--- a/l1_char_class_tab.h
+++ b/l1_char_class_tab.h
@@ -16,7 +16,7 @@
/* U+08 BS */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_QUOTEMETA,
/* U+09 HT */ _CC_BLANK_A|_CC_BLANK_L1|_CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA,
/* U+0A LF */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA,
-/* U+0B VT */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_QUOTEMETA,
+/* U+0B VT */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA,
/* U+0C FF */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA,
/* U+0D CR */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_PSXSPC_A|_CC_PSXSPC_L1|_CC_SPACE_A|_CC_SPACE_L1|_CC_QUOTEMETA,
/* U+0E SO */ _CC_CNTRL_A|_CC_CNTRL_L1|_CC_QUOTEMETA,
diff --git a/lib/feature/unicode_strings.t b/lib/feature/unicode_strings.t
index 7e557b2bc7..8bd536f258 100644
--- a/lib/feature/unicode_strings.t
+++ b/lib/feature/unicode_strings.t
@@ -172,6 +172,7 @@ for my $i ( 0x30 .. 0x39, # 0-9
my @s = (0) x 256;
$s[ord_latin1_to_native 0x09] = 1; # Tab
$s[ord_latin1_to_native 0x0A] = 1; # LF
+$s[ord_latin1_to_native 0x0B] = 1; # VT
$s[ord_latin1_to_native 0x0C] = 1; # FF
$s[ord_latin1_to_native 0x0D] = 1; # CR
$s[ord_latin1_to_native 0x20] = 1; # SPACE
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index b4d980b85a..ab029e48ed 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -12521,11 +12521,12 @@ sub compile_perl() {
# Perl's traditional space doesn't include Vertical Tab
my $XPerlSpace = $perl->add_match_table('XPerlSpace',
Description => '\s, including beyond ASCII',
- Initialize => $Space - 0x000B,
+ #Initialize => $Space - 0x000B,
+ Initialize => $Space,
);
$XPerlSpace->add_alias('SpacePerl'); # A pre-existing synonym
my $PerlSpace = $perl->add_match_table('PerlSpace',
- Description => '\s, restricted to ASCII = [ \f\n\r\t]',
+ Description => '\s, restricted to ASCII = [ \f\n\r\t] plus vertical tab',
Initialize => $XPerlSpace & $ASCII,
);
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 1264cbeb61..abfc4984fa 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -46,6 +46,14 @@ XXX For a release on a stable branch, this section aspires to be:
[ List each incompatible change as a =head2 entry ]
+=head2 C<\s> in regular expressions now matches a Vertical Tab (experimental)
+
+This is an experiment early in the development cycle to see what
+repercussions arise from this change. It may well be that we decide
+to require a C<"use feature"> to activate this behavior. Because
+of the experimental nature of this, which may be reversed, the
+documentation has not been changed to reflect it.
+
=head1 Deprecations
XXX Any deprecated features, syntax, modules etc. should be listed here.
diff --git a/t/re/pat.t b/t/re/pat.t
index 882368e2a7..b34e0930ab 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -985,7 +985,7 @@ sub run_tests {
my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space;
my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space;
- is("@space0", "cr ff lf spc tab", $message);
+ is("@space0", "cr ff lf spc tab vt", $message);
is("@space1", "cr ff lf spc tab vt", $message);
is("@space2", "spc tab", $message);
}