diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2003-09-10 09:31:24 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2003-09-10 09:31:24 +0000 |
commit | a79135933e1df731ba243e532123f9956085f1b3 (patch) | |
tree | c3bf93dbfce001e2dc4e5b964e7ab16b1ef031f8 | |
parent | 2d79bf7f1e821d4cc07e4959f825479a7c0ab102 (diff) | |
download | perl-a79135933e1df731ba243e532123f9956085f1b3.tar.gz |
[perl #23769] Unicode regex broken on simple example
regrepeat() did not work right for UTF-8(ed Latin-1)
in the EXACT case, which made the \x{a0}+ fail.
p4raw-id: //depot/perl@21158
-rw-r--r-- | regexec.c | 14 | ||||
-rwxr-xr-x | t/op/pat.t | 15 |
2 files changed, 23 insertions, 6 deletions
@@ -4065,10 +4065,16 @@ S_regrepeat(pTHX_ regnode *p, I32 max) case CANY: scan = loceol; break; - case EXACT: /* length of string is 1 */ - c = (U8)*STRING(p); - while (scan < loceol && UCHARAT(scan) == c) - scan++; + case EXACT: + if (do_utf8) { + c = (U8)*STRING(p); + while (scan < loceol && utf8_to_uvuni((U8*)scan, 0) == c) + scan += UTF8SKIP(scan); + } else { /* length of string is 1 */ + c = (U8)*STRING(p); + while (scan < loceol && UCHARAT(scan) == c) + scan++; + } break; case EXACTF: /* length of string is 1 */ c = (U8)*STRING(p); diff --git a/t/op/pat.t b/t/op/pat.t index 27262bdcfb..54f67fc476 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..1012\n"; +print "1..1015\n"; BEGIN { chdir 't' if -d 't'; @@ -3212,5 +3212,16 @@ ok(" \x{10428}" =~ qr/\x{10400}/i, ok(" \x{1E01}x" =~ qr/\x{1E00}X/i, "<20030808193656.5109.1@llama.ni-s.u-net.com>"); -# last test 1012 +{ + # [perl #23769] Unicode regex broken on simple example + # regrepeat() didn't handle UTF-8 EXACT case right. + + my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; + + ok($s =~ /\x{a0}/, "[perl #23769]"); + ok($s =~ /\x{a0}+/, "[perl #23769]"); + ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]"); +} + +# last test 1015 |