diff options
author | Yves Orton <demerphq@gmail.com> | 2007-04-27 18:09:56 +0200 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2007-04-27 14:19:15 +0000 |
commit | e64b1bd1d100534286224b3b46db4d9c6a8cb3a9 (patch) | |
tree | 45f340af8635a5079d349ccd92b2249267e66bef /regcomp.c | |
parent | 62aa7ed050aea99a989c95f16814568cdce5b315 (diff) | |
download | perl-e64b1bd1d100534286224b3b46db4d9c6a8cb3a9.tar.gz |
Re: Analysis of problems with mixed encoding case insensitive matches in regex engine.
Message-ID: <9b18b3110704270709y50ef652ci436b3bb29abca275@mail.gmail.com>
p4raw-id: //depot/perl@31102
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 28 |
1 files changed, 15 insertions, 13 deletions
@@ -3364,12 +3364,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */ if (flags & SCF_DO_STCLASS_AND) { for (value = 0; value < 256; value++) - if (!is_LNBREAK_cp(value)) + if (!is_VERTWS_cp(value)) ANYOF_BITMAP_CLEAR(data->start_class, value); } else { for (value = 0; value < 256; value++) - if (is_LNBREAK_cp(value)) + if (is_VERTWS_cp(value)) ANYOF_BITMAP_SET(data->start_class, value); } if (flags & SCF_DO_STCLASS_OR) @@ -6575,16 +6575,18 @@ tryagain: case 0xDF: case 0xC3: case 0xCE: - if (FOLD && is_TRICKYFOLD(RExC_parse,UTF)) { - STRLEN len = UTF ? 0 : 1; - U32 cp = UTF ? utf8_to_uvchr((U8*)RExC_parse, &len) : (U32)((U8*)RExC_parse)[0]; - *flagp |= HASWIDTH; /* could be SIMPLE too, but needs a handler in regexec.regrepeat */ - RExC_parse+=len; - ret = reganode(pRExC_state, FOLDCHAR, cp); - Set_Node_Length(ret, 1); /* MJD */ - } else - goto outer_default; - break; + if (FOLD) { + U32 len,cp; + if (cp = what_len_TRICKYFOLD_safe(RExC_parse,RExC_end,UTF,len)) { + *flagp |= HASWIDTH; /* could be SIMPLE too, but needs a handler in regexec.regrepeat */ + RExC_parse+=len-1; /* we get one from nextchar() as well. :-( */ + ret = reganode(pRExC_state, FOLDCHAR, cp); + Set_Node_Length(ret, 1); /* MJD */ + nextchar(pRExC_state); /* kill whitespace under /x */ + return ret; + } + } + goto outer_default; case '\\': /* Special Escapes @@ -6885,7 +6887,7 @@ tryagain: case 0xDF: case 0xC3: case 0xCE: - if (!FOLD || !is_TRICKYFOLD(p,UTF)) + if (!FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF)) goto normal_default; case '^': case '$': |