From a79135933e1df731ba243e532123f9956085f1b3 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Wed, 10 Sep 2003 09:31:24 +0000 Subject: [perl #23769] Unicode regex broken on simple example regrepeat() did not work right for UTF-8(ed Latin-1) in the EXACT case, which made the \x{a0}+ fail. p4raw-id: //depot/perl@21158 --- regexec.c | 14 ++++++++++---- t/op/pat.t | 15 +++++++++++++-- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/regexec.c b/regexec.c index 464ceaf9e3..d2e9c66358 100644 --- a/regexec.c +++ b/regexec.c @@ -4065,10 +4065,16 @@ S_regrepeat(pTHX_ regnode *p, I32 max) case CANY: scan = loceol; break; - case EXACT: /* length of string is 1 */ - c = (U8)*STRING(p); - while (scan < loceol && UCHARAT(scan) == c) - scan++; + case EXACT: + if (do_utf8) { + c = (U8)*STRING(p); + while (scan < loceol && utf8_to_uvuni((U8*)scan, 0) == c) + scan += UTF8SKIP(scan); + } else { /* length of string is 1 */ + c = (U8)*STRING(p); + while (scan < loceol && UCHARAT(scan) == c) + scan++; + } break; case EXACTF: /* length of string is 1 */ c = (U8)*STRING(p); diff --git a/t/op/pat.t b/t/op/pat.t index 27262bdcfb..54f67fc476 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..1012\n"; +print "1..1015\n"; BEGIN { chdir 't' if -d 't'; @@ -3212,5 +3212,16 @@ ok(" \x{10428}" =~ qr/\x{10400}/i, ok(" \x{1E01}x" =~ qr/\x{1E00}X/i, "<20030808193656.5109.1@llama.ni-s.u-net.com>"); -# last test 1012 +{ + # [perl #23769] Unicode regex broken on simple example + # regrepeat() didn't handle UTF-8 EXACT case right. + + my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; + + ok($s =~ /\x{a0}/, "[perl #23769]"); + ok($s =~ /\x{a0}+/, "[perl #23769]"); + ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]"); +} + +# last test 1015 -- cgit v1.2.1