diff options
author | Yves Orton <demerphq@gmail.com> | 2007-04-24 18:46:05 +0200 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2007-04-26 10:23:30 +0000 |
commit | 32e6a07c84b153f78f946de50870bc0ee030624f (patch) | |
tree | 80c02123a3e592ec2c9397c178cd62e38136d870 /regexec.c | |
parent | 0f68039566ac464bc1d4ff8f5b574153a1f6e9e9 (diff) | |
download | perl-32e6a07c84b153f78f946de50870bc0ee030624f.tar.gz |
Re: Analysis of problems with mixed encoding case insensitive matches in regex engine.
Message-ID: <9b18b3110704240746u461e4bdcl208ef7d7f9c5ef64@mail.gmail.com>
p4raw-id: //depot/perl@31081
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 27 |
1 files changed, 27 insertions, 0 deletions
@@ -5004,7 +5004,34 @@ NULL sayNO; /* NOTREACHED */ #undef ST + case FOLDCHAR: + n = ARG(scan); + if (nextchr==n) { + locinput += UTF8SKIP(locinput); + } else { + /* This malarky is to handle LATIN SMALL LETTER SHARP S + properly. Sigh */ + if (0xDF==n && (UTF||do_utf8) && + toLOWER(locinput[0])=='s' && toLOWER(locinput[1])=='s') + { + locinput += 2; + } else if (do_utf8) { + U8 tmpbuf1[UTF8_MAXBYTES_CASE+1]; + STRLEN tmplen1; + U8 tmpbuf2[UTF8_MAXBYTES_CASE+1]; + STRLEN tmplen2; + to_uni_fold(n, tmpbuf1, &tmplen1); + to_utf8_fold(locinput, tmpbuf2, &tmplen2); + if (tmplen1!=tmplen2 || !strnEQ(tmpbuf1,tmpbuf2,tmplen1)) + sayNO; + else + locinput += UTF8SKIP(locinput); + } else + sayNO; + } + nextchr = UCHARAT(locinput); + break; case LNBREAK: if ((n=is_LNBREAK(locinput,do_utf8))) { locinput += n; |