From 32e6a07c84b153f78f946de50870bc0ee030624f Mon Sep 17 00:00:00 2001
From: Yves Orton <demerphq@gmail.com>
Date: Tue, 24 Apr 2007 18:46:05 +0200
Subject: Re: Analysis of problems with mixed encoding case insensitive matches
 in regex engine. Message-ID:
 <9b18b3110704240746u461e4bdcl208ef7d7f9c5ef64@mail.gmail.com>

p4raw-id: //depot/perl@31081
---
 regexec.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'regexec.c')

diff --git a/regexec.c b/regexec.c
index d84190b0d6..374d480be7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -5004,7 +5004,34 @@ NULL
             sayNO;
             /* NOTREACHED */
 #undef ST
+        case FOLDCHAR:
+            n = ARG(scan);
+            if (nextchr==n) {
+                locinput += UTF8SKIP(locinput);
 
+            } else {
+                /* This malarky is to handle LATIN SMALL LETTER SHARP S 
+                   properly. Sigh */
+                if (0xDF==n && (UTF||do_utf8) &&  
+                    toLOWER(locinput[0])=='s' && toLOWER(locinput[1])=='s') 
+                {
+                    locinput += 2;
+                } else if (do_utf8) {
+                    U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
+                    STRLEN tmplen1;
+                    U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
+                    STRLEN tmplen2;
+                    to_uni_fold(n, tmpbuf1, &tmplen1);
+                    to_utf8_fold(locinput, tmpbuf2, &tmplen2);    
+                    if (tmplen1!=tmplen2 || !strnEQ(tmpbuf1,tmpbuf2,tmplen1))
+                        sayNO;
+                    else 
+                        locinput += UTF8SKIP(locinput);
+                } else 
+                    sayNO;
+            } 
+            nextchr = UCHARAT(locinput);  
+            break;
         case LNBREAK:
             if ((n=is_LNBREAK(locinput,do_utf8))) {
                 locinput += n;
-- 
cgit v1.2.1