diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-08-01 15:12:23 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-08-02 09:24:53 -0600 |
commit | b9c48b5b74957012d9ed4b6e6cc2e067f5af2f97 (patch) | |
tree | bf292382966c434a5e0564569d0e5860c2d40924 /t/re | |
parent | b9e8c997b9a1c15e919ad20df9ba2f1501c64d76 (diff) | |
download | perl-b9c48b5b74957012d9ed4b6e6cc2e067f5af2f97.tar.gz |
regcomp.c: Fix \N{} multi-char fold buffer boundary bug
An earlier commit in this topic branch fixed the bug (for non-\N{})
cases where a multi-character fold could try to span two EXACTFish
nodes, where they are split because the first one would otherwise
contain too long a string.
This commit extends that fix to include characters entered via \N{...}.
It does this by causing \N handling to be split, so that if the \N
resolves to a single code point, it goes through the normal processing,
so that it no longer bypasses the code that was added in the earlier
commit.
Diffstat (limited to 't/re')
-rw-r--r-- | t/re/pat_advanced.t | 21 | ||||
-rw-r--r-- | t/re/re_tests | 4 |
2 files changed, 23 insertions, 2 deletions
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index a3540fd185..95f904fea1 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -2083,6 +2083,27 @@ EOP } ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed"); + $failed = 0; + for my $repeat (1 .. 300) { + my $string = $single x $repeat; + my $lhs = $string . "\N{LATIN SMALL LIGATURE FFI}"; + if ($lhs !~ m/${string}ff\N{LATIN SMALL LETTER I}/i) { + $failed = $repeat; + last; + } + } + ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed"); + + $failed = 0; + for my $repeat (1 .. 300) { + my $string = $single x $repeat; + my $lhs = $string . "\N{LATIN SMALL LIGATURE FFL}"; + if ($lhs !~ m/${string}ff\N{U+6c}/i) { + $failed = $repeat; + last; + } + } + ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed"); } # diff --git a/t/re/re_tests b/t/re/re_tests index 9fa374e700..3fdaf80ed3 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1443,7 +1443,7 @@ abc\N abc\n n # figures it out. \N{U+} - c - Invalid hexadecimal number [\N{U+}] - c - Invalid hexadecimal number -\N{U+4AG3} - c - Illegal hexadecimal digit +\N{U+4AG3} - c - Invalid hexadecimal number [\N{U+4AG3}] - c - Invalid hexadecimal number abc\N{def - c - \\N{NAME} must be resolved by the lexer @@ -1457,7 +1457,7 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer # Verify works in single quotish context; regex compiler delivers slightly different msg # \N{U+BEEF.BEAD} succeeds here, because can't completely hide it from the outside. -\N{U+0xBEEF} - c - Illegal hexadecimal digit +\N{U+0xBEEF} - c - Invalid hexadecimal number \c` - c - \"\\c`\" is more clearly written simply as \"\\ \" \c1 - c - \"\\c1\" is more clearly written simply as \"q\" \cA \001 y $& \1 |