summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2007-04-24 18:46:05 +0200
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2007-04-26 10:23:30 +0000
commit32e6a07c84b153f78f946de50870bc0ee030624f (patch)
tree80c02123a3e592ec2c9397c178cd62e38136d870 /regexec.c
parent0f68039566ac464bc1d4ff8f5b574153a1f6e9e9 (diff)
downloadperl-32e6a07c84b153f78f946de50870bc0ee030624f.tar.gz
Re: Analysis of problems with mixed encoding case insensitive matches in regex engine.
Message-ID: <9b18b3110704240746u461e4bdcl208ef7d7f9c5ef64@mail.gmail.com> p4raw-id: //depot/perl@31081
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c27
1 files changed, 27 insertions, 0 deletions
diff --git a/regexec.c b/regexec.c
index d84190b0d6..374d480be7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -5004,7 +5004,34 @@ NULL
sayNO;
/* NOTREACHED */
#undef ST
+ case FOLDCHAR:
+ n = ARG(scan);
+ if (nextchr==n) {
+ locinput += UTF8SKIP(locinput);
+ } else {
+ /* This malarky is to handle LATIN SMALL LETTER SHARP S
+ properly. Sigh */
+ if (0xDF==n && (UTF||do_utf8) &&
+ toLOWER(locinput[0])=='s' && toLOWER(locinput[1])=='s')
+ {
+ locinput += 2;
+ } else if (do_utf8) {
+ U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
+ STRLEN tmplen1;
+ U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
+ STRLEN tmplen2;
+ to_uni_fold(n, tmpbuf1, &tmplen1);
+ to_utf8_fold(locinput, tmpbuf2, &tmplen2);
+ if (tmplen1!=tmplen2 || !strnEQ(tmpbuf1,tmpbuf2,tmplen1))
+ sayNO;
+ else
+ locinput += UTF8SKIP(locinput);
+ } else
+ sayNO;
+ }
+ nextchr = UCHARAT(locinput);
+ break;
case LNBREAK:
if ((n=is_LNBREAK(locinput,do_utf8))) {
locinput += n;