summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-08-23 10:30:02 -0600
committerKarl Williamson <khw@cpan.org>2015-08-24 12:11:33 -0600
commita0bd1a30d379f2625c307657d63fc50173d7a56d (patch)
tree7526c17ccb6fcaa679f4fe7cb4bbddf9a4eab466 /t
parent2d3d6e6e7c2d50b1cc47032cf089151823fb20a6 (diff)
downloadperl-a0bd1a30d379f2625c307657d63fc50173d7a56d.tar.gz
Make qr/(?[ ])/ work in UTF-8 locales
Previously use of this under /l regex rules was a compile time error. Now it works like \b{wb} and \b{sb}, which compile under locale rules and always work like Unicode says they should. A UTF-8 locale implies Unicode rules, and the goal is for it to work seamlessly with the rest of perl. This construct was the only one I am aware of that didn't work seamlessly (not counting OS interfaces) under UTF-8 LC_CTYPE locales. For all three of these constructs, use with a non-UTF-8 runtime locale raises a warning, and Unicode rules are used anyway. UTF-8 locale collation still has problems, but this is low priority to fix, as it's a lot of work, and if one really cares, one should be using Unicode::Collate.
Diffstat (limited to 't')
-rw-r--r--t/lib/warnings/regexec47
-rw-r--r--t/re/reg_mesg.t2
-rw-r--r--t/re/regex_sets.t41
3 files changed, 87 insertions, 3 deletions
diff --git a/t/lib/warnings/regexec b/t/lib/warnings/regexec
index 750880e4db..1f3b65b167 100644
--- a/t/lib/warnings/regexec
+++ b/t/lib/warnings/regexec
@@ -212,3 +212,50 @@ Use of \b{} or \B{} for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at -
Use of \b{} or \B{} for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 16.
Use of \b{} or \B{} for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 17.
Use of \b{} or \B{} for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 17.
+########
+# NAME (?[ ]) in non-UTF-8 locale
+eval { require POSIX; POSIX->import("locale_h") };
+if ($@) {
+ print("SKIPPED\n# no POSIX\n"),exit;
+}
+no warnings 'experimental::regex_sets';
+use warnings 'locale';
+use locale;
+setlocale(&POSIX::LC_CTYPE, "C");
+"\N{KELVIN SIGN}" =~ /(?[ \N{KELVIN SIGN} ])/i;
+"K" =~ /(?[ \N{KELVIN SIGN} ])/i;
+"k" =~ /(?[ \N{KELVIN SIGN} ])/i;
+":" =~ /(?[ \: ])/;
+no warnings 'locale';
+EXPECT
+Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 9.
+Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 9.
+Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 10.
+Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 10.
+Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 11.
+Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 11.
+Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 12.
+Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale at - line 12.
+########
+# NAME (?[ ]) in UTF-8 locale
+require '../loc_tools.pl';
+unless (locales_enabled()) {
+ print("SKIPPED\n# locales not available\n"),exit;
+}
+eval { require POSIX; POSIX->import("locale_h") };
+if ($@) {
+ print("SKIPPED\n# no POSIX\n"),exit;
+}
+my $utf8_locale = find_utf8_ctype_locale();
+unless ($utf8_locale) {
+ print("SKIPPED\n# No UTF-8 locale available\n"),exit;
+}
+no warnings 'experimental::regex_sets';
+use warnings 'locale';
+use locale;
+setlocale(&POSIX::LC_CTYPE, $utf8_locale);
+"\N{KELVIN SIGN}" =~ /(?[ \N{KELVIN SIGN} ])/i;
+"K" =~ /(?[ \N{KELVIN SIGN} ])/i;
+"k" =~ /(?[ \N{KELVIN SIGN} ])/i;
+":" =~ /(?[ \: ])/;
+EXPECT
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index a0588241f3..d9d9d7437b 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -205,7 +205,6 @@ my @death =
'/(?[[[:w:]]])/' => "POSIX class [:w:] unknown {#} m/(?[[[:w:]{#}]])/",
'/(?[[:w:]])/' => "POSIX class [:w:] unknown {#} m/(?[[:w:]{#}])/",
'/(?[a])/' => 'Unexpected character {#} m/(?[a{#}])/',
- '/(?[\t])/l' => '(?[...]) not valid in locale {#} m/(?[{#}\t])/',
'/(?[ + \t ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/(?[ +{#} \t ])/',
'/(?[ \cK - ( + \t ) ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/(?[ \cK - ( +{#} \t ) ])/',
'/(?[ \cK ( \t ) ])/' => 'Unexpected \'(\' with no preceding operator {#} m/(?[ \cK ({#} \t ) ])/',
@@ -410,7 +409,6 @@ my @death_utf8 = mark_as_utf8(
'/ネ(?[[[:ネ:]]])ネ/' => "POSIX class [:ネ:] unknown {#} m/ネ(?[[[:ネ:]{#}]])ネ/",
'/ネ(?[[:ネ:]])ネ/' => "POSIX class [:ネ:] unknown {#} m/ネ(?[[:ネ:]{#}])ネ/",
'/ネ(?[ネ])ネ/' => 'Unexpected character {#} m/ネ(?[ネ{#}])ネ/',
- '/ネ(?[ネ])/l' => '(?[...]) not valid in locale {#} m/ネ(?[{#}ネ])/',
'/ネ(?[ + [ネ] ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/ネ(?[ +{#} [ネ] ])/',
'/ネ(?[ \cK - ( + [ネ] ) ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/ネ(?[ \cK - ( +{#} [ネ] ) ])/',
'/ネ(?[ \cK ( [ネ] ) ])/' => 'Unexpected \'(\' with no preceding operator {#} m/ネ(?[ \cK ({#} [ネ] ) ])/',
diff --git a/t/re/regex_sets.t b/t/re/regex_sets.t
index 48a4f00b8e..c85fde6f2d 100644
--- a/t/re/regex_sets.t
+++ b/t/re/regex_sets.t
@@ -9,7 +9,8 @@ BEGIN {
chdir 't' if -d 't';
@INC = ('../lib','.','../ext/re');
require './test.pl';
- require './test.pl'; require './charset_tools.pl';
+ require './charset_tools.pl';
+ require './loc_tools.pl';
skip_all_without_unicode_tables();
}
@@ -96,6 +97,44 @@ like("k", $still_fold, "/i on interpolated (?[ ]) is retained in outer without /
eval 'my $x = qr/(?[ [a] ])/; qr/(?[ $x ])/';
is($@, "", 'qr/(?[ [a] ])/ can be interpolated');
+if (! is_miniperl() && locales_enabled('LC_CTYPE')) {
+ my $utf8_locale = find_utf8_ctype_locale;
+ SKIP: {
+ skip("No utf8 locale available on this platform", 8) unless $utf8_locale;
+
+ setlocale(&POSIX::LC_ALL, "C");
+ use locale;
+
+ $kelvin_fold = qr/(?[ \N{KELVIN SIGN} ])/i;
+ my $single_char_class = qr/(?[ \: ])/;
+
+ setlocale(&POSIX::LC_ALL, $utf8_locale);
+
+ like("\N{KELVIN SIGN}", $kelvin_fold,
+ '(?[ \N{KELVIN SIGN} ]) matches itself under /i in UTF8-locale');
+ like("K", $kelvin_fold,
+ '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in UTF8-locale');
+ like("k", $kelvin_fold,
+ '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in UTF8-locale');
+ like(":", $single_char_class,
+ '(?[ : ]) matches itself in UTF8-locale (a single character class)');
+
+ setlocale(&POSIX::LC_ALL, "C");
+
+ # These should generate warnings (the above 4 shouldn't), but like()
+ # suppresses them, so the warnings tests are in t/lib/warnings/regexec
+ like("\N{KELVIN SIGN}", $kelvin_fold,
+ '(?[ \N{KELVIN SIGN} ]) matches itself under /i in C locale');
+ like("K", $kelvin_fold,
+ '(?[ \N{KELVIN SIGN} ]) matches "K" under /i in C locale');
+ like("k", $kelvin_fold,
+ '(?[ \N{KELVIN SIGN} ]) matches "k" under /i in C locale');
+ like(":", $single_char_class,
+ '(?[ : ]) matches itself in C locale (a single character class)');
+ }
+}
+
+
done_testing();
1;