summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-09-10 09:31:24 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-09-10 09:31:24 +0000
commita79135933e1df731ba243e532123f9956085f1b3 (patch)
treec3bf93dbfce001e2dc4e5b964e7ab16b1ef031f8
parent2d79bf7f1e821d4cc07e4959f825479a7c0ab102 (diff)
downloadperl-a79135933e1df731ba243e532123f9956085f1b3.tar.gz
[perl #23769] Unicode regex broken on simple example
regrepeat() did not work right for UTF-8(ed Latin-1) in the EXACT case, which made the \x{a0}+ fail. p4raw-id: //depot/perl@21158
-rw-r--r--regexec.c14
-rwxr-xr-xt/op/pat.t15
2 files changed, 23 insertions, 6 deletions
diff --git a/regexec.c b/regexec.c
index 464ceaf9e3..d2e9c66358 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4065,10 +4065,16 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
case CANY:
scan = loceol;
break;
- case EXACT: /* length of string is 1 */
- c = (U8)*STRING(p);
- while (scan < loceol && UCHARAT(scan) == c)
- scan++;
+ case EXACT:
+ if (do_utf8) {
+ c = (U8)*STRING(p);
+ while (scan < loceol && utf8_to_uvuni((U8*)scan, 0) == c)
+ scan += UTF8SKIP(scan);
+ } else { /* length of string is 1 */
+ c = (U8)*STRING(p);
+ while (scan < loceol && UCHARAT(scan) == c)
+ scan++;
+ }
break;
case EXACTF: /* length of string is 1 */
c = (U8)*STRING(p);
diff --git a/t/op/pat.t b/t/op/pat.t
index 27262bdcfb..54f67fc476 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..1012\n";
+print "1..1015\n";
BEGIN {
chdir 't' if -d 't';
@@ -3212,5 +3212,16 @@ ok(" \x{10428}" =~ qr/\x{10400}/i,
ok(" \x{1E01}x" =~ qr/\x{1E00}X/i,
"<20030808193656.5109.1@llama.ni-s.u-net.com>");
-# last test 1012
+{
+ # [perl #23769] Unicode regex broken on simple example
+ # regrepeat() didn't handle UTF-8 EXACT case right.
+
+ my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
+
+ ok($s =~ /\x{a0}/, "[perl #23769]");
+ ok($s =~ /\x{a0}+/, "[perl #23769]");
+ ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]");
+}
+
+# last test 1015