summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSlaven Rezic <slaven@rezic.de>2009-01-04 17:28:33 +0100
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2009-01-04 17:28:33 +0100
commitc012444fd89eef64e1d1687642cdb9f968e96739 (patch)
tree510bdf3a51b186fe83a4d24ea15b27ca401455b9
parentbd2db5df3cd7c8f0ecc592ef15151e17c1504af9 (diff)
downloadperl-c012444fd89eef64e1d1687642cdb9f968e96739.tar.gz
Another regexp failure with utf8-flagged string and byte-flagged pattern (reminder)
Date: 17 Nov 2007 16:29:29 +0100 Message-ID: <87r6iohova.fsf@biokovo-amd64.herceg.de>
-rw-r--r--regexec.c8
-rwxr-xr-xt/op/pat.t11
2 files changed, 16 insertions, 3 deletions
diff --git a/regexec.c b/regexec.c
index 94d67618fb..bc8da6e24b 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1007,15 +1007,16 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
+ UV uvc_unfolded = 0; \
switch (trie_type) { \
case trie_utf8_fold: \
if ( foldlen>0 ) { \
- uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+ uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
foldlen -= len; \
uscan += len; \
len=0; \
} else { \
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
foldlen -= UNISKIP( uvc ); \
uscan = foldbuf + UNISKIP( uvc ); \
@@ -1054,6 +1055,9 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
charid = (U16)SvIV(*svpp); \
} \
} \
+ if (!charid && trie_type == trie_utf8_fold && !UTF) { \
+ charid = trie->charmap[uvc_unfolded]; \
+ } \
} STMT_END
#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
diff --git a/t/op/pat.t b/t/op/pat.t
index aa275bd4f4..586b31788f 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -13,7 +13,7 @@ sub run_tests;
$| = 1;
-my $EXPECTED_TESTS = 3865; # Update this when adding/deleting tests.
+my $EXPECTED_TESTS = 3961; # Update this when adding/deleting tests.
BEGIN {
chdir 't' if -d 't';
@@ -3896,6 +3896,15 @@ sub run_tests {
iseq $1, "\xd6", "Upgrade error";
}
+ {
+# more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding
+ for my $chr (160 .. 255) {
+ my $chr_byte = chr($chr);
+ my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8);
+ my $rx = qr{$chr_byte|X}i;
+ ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr");
+ }
+ }
{
# Regardless of utf8ness any character matches itself when