regcomp.c: Fix \N{} multi-char fold buffer boundary bug

An earlier commit in this topic branch fixed the bug (for non-\N{}) cases where a multi-character fold could try to span two EXACTFish nodes, where they are split because the first one would otherwise contain too long a string. This commit extends that fix to include characters entered via \N{...}. It does this by causing \N handling to be split, so that if the \N resolves to a single code point, it goes through the normal processing, so that it no longer bypasses the code that was added in the earlier commit.
author: Karl Williamson <public@khwilliamson.com> 2012-08-01 15:12:23 -0600
committer: Karl Williamson <public@khwilliamson.com> 2012-08-02 09:24:53 -0600
commit: b9c48b5b74957012d9ed4b6e6cc2e067f5af2f97 (patch)
tree: bf292382966c434a5e0564569d0e5860c2d40924 /t/re
parent: b9e8c997b9a1c15e919ad20df9ba2f1501c64d76 (diff)
download: perl-b9c48b5b74957012d9ed4b6e6cc2e067f5af2f97.tar.gz
2 files changed, 23 insertions, 2 deletions
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index a3540fd185..95f904fea1 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -2083,6 +2083,27 @@ EOP
         }
         ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed");
 
+        $failed = 0;
+        for my $repeat (1 .. 300) {
+            my $string = $single x $repeat;
+            my $lhs = $string . "\N{LATIN SMALL LIGATURE FFI}";
+            if ($lhs !~ m/${string}ff\N{LATIN SMALL LETTER I}/i) {
+                $failed = $repeat;
+                last;
+            }
+        }
+        ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed");
+
+        $failed = 0;
+        for my $repeat (1 .. 300) {
+            my $string = $single x $repeat;
+            my $lhs = $string . "\N{LATIN SMALL LIGATURE FFL}";
+            if ($lhs !~ m/${string}ff\N{U+6c}/i) {
+                $failed = $repeat;
+                last;
+            }
+        }
+        ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed");
     }
 
     #
diff --git a/t/re/re_tests b/t/re/re_tests
index 9fa374e700..3fdaf80ed3 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1443,7 +1443,7 @@ abc\N	abc\n	n
 # figures it out.
 \N{U+}	-	c	-	Invalid hexadecimal number
 [\N{U+}]	-	c	-	Invalid hexadecimal number
-\N{U+4AG3}	-	c	-	Illegal hexadecimal digit
+\N{U+4AG3}	-	c	-	Invalid hexadecimal number
 [\N{U+4AG3}]	-	c	-	Invalid hexadecimal number
 abc\N{def	-	c	-	\\N{NAME} must be resolved by the lexer
 
@@ -1457,7 +1457,7 @@ abc\N{def	-	c	-	\\N{NAME} must be resolved by the lexer
 
 # Verify works in single quotish context; regex compiler delivers slightly different msg
 # \N{U+BEEF.BEAD} succeeds here, because can't completely hide it from the outside.
-\N{U+0xBEEF}	-	c	-	Illegal hexadecimal digit
+\N{U+0xBEEF}	-	c	-	Invalid hexadecimal number
 \c`	-	c	-	\"\\c`\" is more clearly written simply as \"\\ \"
 \c1	-	c	-	\"\\c1\" is more clearly written simply as \"q\"
 \cA	\001	y	$&	\1
author	Karl Williamson <public@khwilliamson.com>	2012-08-01 15:12:23 -0600
committer	Karl Williamson <public@khwilliamson.com>	2012-08-02 09:24:53 -0600
commit	b9c48b5b74957012d9ed4b6e6cc2e067f5af2f97 (patch)
tree	bf292382966c434a5e0564569d0e5860c2d40924 /t/re
parent	b9e8c997b9a1c15e919ad20df9ba2f1501c64d76 (diff)
download	perl-b9c48b5b74957012d9ed4b6e6cc2e067f5af2f97.tar.gz