diff options
author | Slaven Rezic <slaven@rezic.de> | 2009-01-04 17:28:33 +0100 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2009-01-04 17:28:33 +0100 |
commit | c012444fd89eef64e1d1687642cdb9f968e96739 (patch) | |
tree | 510bdf3a51b186fe83a4d24ea15b27ca401455b9 | |
parent | bd2db5df3cd7c8f0ecc592ef15151e17c1504af9 (diff) | |
download | perl-c012444fd89eef64e1d1687642cdb9f968e96739.tar.gz |
Another regexp failure with utf8-flagged string and byte-flagged pattern (reminder)
Date: 17 Nov 2007 16:29:29 +0100
Message-ID: <87r6iohova.fsf@biokovo-amd64.herceg.de>
-rw-r--r-- | regexec.c | 8 | ||||
-rwxr-xr-x | t/op/pat.t | 11 |
2 files changed, 16 insertions, 3 deletions
@@ -1007,15 +1007,16 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos, #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \ + UV uvc_unfolded = 0; \ switch (trie_type) { \ case trie_utf8_fold: \ if ( foldlen>0 ) { \ - uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \ + uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \ foldlen -= len; \ uscan += len; \ len=0; \ } else { \ - uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \ + uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \ uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \ foldlen -= UNISKIP( uvc ); \ uscan = foldbuf + UNISKIP( uvc ); \ @@ -1054,6 +1055,9 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \ charid = (U16)SvIV(*svpp); \ } \ } \ + if (!charid && trie_type == trie_utf8_fold && !UTF) { \ + charid = trie->charmap[uvc_unfolded]; \ + } \ } STMT_END #define REXEC_FBC_EXACTISH_CHECK(CoNd) \ diff --git a/t/op/pat.t b/t/op/pat.t index aa275bd4f4..586b31788f 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -13,7 +13,7 @@ sub run_tests; $| = 1; -my $EXPECTED_TESTS = 3865; # Update this when adding/deleting tests. +my $EXPECTED_TESTS = 3961; # Update this when adding/deleting tests. BEGIN { chdir 't' if -d 't'; @@ -3896,6 +3896,15 @@ sub run_tests { iseq $1, "\xd6", "Upgrade error"; } + { +# more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding + for my $chr (160 .. 255) { + my $chr_byte = chr($chr); + my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8); + my $rx = qr{$chr_byte|X}i; + ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr"); + } + } { # Regardless of utf8ness any character matches itself when |