summaryrefslogtreecommitdiff
path: root/t/re
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-08-01 15:12:23 -0600
committerKarl Williamson <public@khwilliamson.com>2012-08-02 09:24:53 -0600
commitb9c48b5b74957012d9ed4b6e6cc2e067f5af2f97 (patch)
treebf292382966c434a5e0564569d0e5860c2d40924 /t/re
parentb9e8c997b9a1c15e919ad20df9ba2f1501c64d76 (diff)
downloadperl-b9c48b5b74957012d9ed4b6e6cc2e067f5af2f97.tar.gz
regcomp.c: Fix \N{} multi-char fold buffer boundary bug
An earlier commit in this topic branch fixed the bug (for non-\N{}) cases where a multi-character fold could try to span two EXACTFish nodes, where they are split because the first one would otherwise contain too long a string. This commit extends that fix to include characters entered via \N{...}. It does this by causing \N handling to be split, so that if the \N resolves to a single code point, it goes through the normal processing, so that it no longer bypasses the code that was added in the earlier commit.
Diffstat (limited to 't/re')
-rw-r--r--t/re/pat_advanced.t21
-rw-r--r--t/re/re_tests4
2 files changed, 23 insertions, 2 deletions
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index a3540fd185..95f904fea1 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -2083,6 +2083,27 @@ EOP
}
ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed");
+ $failed = 0;
+ for my $repeat (1 .. 300) {
+ my $string = $single x $repeat;
+ my $lhs = $string . "\N{LATIN SMALL LIGATURE FFI}";
+ if ($lhs !~ m/${string}ff\N{LATIN SMALL LETTER I}/i) {
+ $failed = $repeat;
+ last;
+ }
+ }
+ ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed");
+
+ $failed = 0;
+ for my $repeat (1 .. 300) {
+ my $string = $single x $repeat;
+ my $lhs = $string . "\N{LATIN SMALL LIGATURE FFL}";
+ if ($lhs !~ m/${string}ff\N{U+6c}/i) {
+ $failed = $repeat;
+ last;
+ }
+ }
+ ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed");
}
#
diff --git a/t/re/re_tests b/t/re/re_tests
index 9fa374e700..3fdaf80ed3 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1443,7 +1443,7 @@ abc\N abc\n n
# figures it out.
\N{U+} - c - Invalid hexadecimal number
[\N{U+}] - c - Invalid hexadecimal number
-\N{U+4AG3} - c - Illegal hexadecimal digit
+\N{U+4AG3} - c - Invalid hexadecimal number
[\N{U+4AG3}] - c - Invalid hexadecimal number
abc\N{def - c - \\N{NAME} must be resolved by the lexer
@@ -1457,7 +1457,7 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
# Verify works in single quotish context; regex compiler delivers slightly different msg
# \N{U+BEEF.BEAD} succeeds here, because can't completely hide it from the outside.
-\N{U+0xBEEF} - c - Illegal hexadecimal digit
+\N{U+0xBEEF} - c - Invalid hexadecimal number
\c` - c - \"\\c`\" is more clearly written simply as \"\\ \"
\c1 - c - \"\\c1\" is more clearly written simply as \"q\"
\cA \001 y $& \1