diff options
author | Karl Williamson <khw@cpan.org> | 2015-02-17 15:03:32 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-02-19 22:55:01 -0700 |
commit | 64935bc6975bb01af403817752e88d6540c8711d (patch) | |
tree | 6a1619ac46d1501a5b296a2b69d9af0b98db8c58 /lib | |
parent | 0e0b935601a8b7a2c56653412a94a36f986bc34f (diff) | |
download | perl-64935bc6975bb01af403817752e88d6540c8711d.tar.gz |
Add qr/\b{gcb}/
A function implements seeing if the space between any two characters is
a grapheme cluster break. Afer I wrote this, I realized that an array
lookup might be a better implementation, but the deadline for v5.22 was
too close to change it. I did see that my gcc optimized it down to
an array lookup.
This makes the implementation of \X go from being complicated to
trivial.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/unicore/mktables | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 2da7bb3416..511ad020ee 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -18762,6 +18762,7 @@ sub _test_break($$) { my @should_match = map { eval "\"$_\"" } @should_display; # If a string can be represented in both non-ut8 and utf8, test both cases + my $display_upgrade = ""; UPGRADE: for my $to_upgrade (0 .. 1) { @@ -18771,8 +18772,54 @@ sub _test_break($$) { next UPGRADE if utf8::is_utf8($string); utf8::upgrade($string); + $display_upgrade = " (utf8-upgraded)"; + } + + # The /l modifier has C after it to indicate the locale to try + my @modifiers = qw(a aa d lC u i); + push @modifiers, "l$utf8_locale" if defined $utf8_locale; + + # Test for each of the regex modifiers. + for my $modifier (@modifiers) { + my $display_locale = ""; + + # For /l, set the locale to what it says to. + if ($modifier =~ / ^ l (.*) /x) { + my $locale = $1; + $display_locale = "(locale = $locale)"; + use Config; + if (defined $Config{d_setlocale}) { + eval { require POSIX; import POSIX 'locale_h'; }; + if (defined &POSIX::LC_CTYPE) { + POSIX::setlocale(&POSIX::LC_CTYPE, $locale); + } + } + $modifier = 'l'; + } + + no warnings qw(locale regexp surrogate); + my $pattern = "(?$modifier:$break_pattern)"; + + # Actually do the test + my $matched = $string =~ qr/$pattern/; + print "not " unless $matched; + + # Fancy display of test results + $matched = ($matched) ? "matched" : "failed to match"; + print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale\n"; + + # Repeat with the first \B{} in the pattern. This makes sure the + # code in regexec.c:find_byclass() for \B gets executed + if ($pattern =~ / ( .*? : ) .* ( \\B\{ .* ) /x) { + my $B_pattern = "$1$2"; + $matched = $string =~ qr/$B_pattern/; + print "not " unless $matched; + print "ok ", ++$Tests, " - \"$display_string\" $matched /$B_pattern/$display_upgrade; line $line $display_locale\n"; + } } + next if $break_type ne 'gcb'; + # Finally, do the \X match. my @matches = $string =~ /(\X)/g; |