From 32e6a07c84b153f78f946de50870bc0ee030624f Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Tue, 24 Apr 2007 18:46:05 +0200 Subject: Re: Analysis of problems with mixed encoding case insensitive matches in regex engine. Message-ID: <9b18b3110704240746u461e4bdcl208ef7d7f9c5ef64@mail.gmail.com> p4raw-id: //depot/perl@31081 --- regexec.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'regexec.c') diff --git a/regexec.c b/regexec.c index d84190b0d6..374d480be7 100644 --- a/regexec.c +++ b/regexec.c @@ -5004,7 +5004,34 @@ NULL sayNO; /* NOTREACHED */ #undef ST + case FOLDCHAR: + n = ARG(scan); + if (nextchr==n) { + locinput += UTF8SKIP(locinput); + } else { + /* This malarky is to handle LATIN SMALL LETTER SHARP S + properly. Sigh */ + if (0xDF==n && (UTF||do_utf8) && + toLOWER(locinput[0])=='s' && toLOWER(locinput[1])=='s') + { + locinput += 2; + } else if (do_utf8) { + U8 tmpbuf1[UTF8_MAXBYTES_CASE+1]; + STRLEN tmplen1; + U8 tmpbuf2[UTF8_MAXBYTES_CASE+1]; + STRLEN tmplen2; + to_uni_fold(n, tmpbuf1, &tmplen1); + to_utf8_fold(locinput, tmpbuf2, &tmplen2); + if (tmplen1!=tmplen2 || !strnEQ(tmpbuf1,tmpbuf2,tmplen1)) + sayNO; + else + locinput += UTF8SKIP(locinput); + } else + sayNO; + } + nextchr = UCHARAT(locinput); + break; case LNBREAK: if ((n=is_LNBREAK(locinput,do_utf8))) { locinput += n; -- cgit v1.2.1