summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2007-12-17 16:21:46 +0100
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2007-12-17 16:01:32 +0000
commita0a388a13daad79062b3c4b126f22d2f46fc82dd (patch)
treec1f1562353529ecc4edfee3469657379d1adf79d /regcomp.c
parent5cf57fc61e657066a9798cca42453d9e2f71bd8c (diff)
downloadperl-a0a388a13daad79062b3c4b126f22d2f46fc82dd.tar.gz
Fix various bugs in regex engine with mixed utf8/latin pattern and strings. Related to [perl #36207] among others
Message-ID: <9b18b3110712170621h41de2c76k331971e3660abcb0@mail.gmail.com> p4raw-id: //depot/perl@32628
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c27
1 files changed, 23 insertions, 4 deletions
diff --git a/regcomp.c b/regcomp.c
index 19d6f15985..5a175ba807 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2791,7 +2791,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
last = cur;
}
} else {
- if ( last ) {
+/*
+ Currently we assume that the trie can handle unicode and ascii
+ matches fold cased matches. If this proves true then the following
+ define will prevent tries in this situation.
+
+ #define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
+*/
+#define TRIE_TYPE_IS_SAFE 1
+ if ( last && TRIE_TYPE_IS_SAFE ) {
make_trie( pRExC_state,
startbranch, first, cur, tail, count,
optype, depth+1 );
@@ -2819,7 +2827,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
"", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
});
- if ( last ) {
+
+ if ( last && TRIE_TYPE_IS_SAFE ) {
made= make_trie( pRExC_state, startbranch, first, scan, tail, count, optype, depth+1 );
#ifdef TRIE_STUDY_OPT
if ( ((made == MADE_EXACT_TRIE &&
@@ -6867,6 +6876,7 @@ tryagain:
case 0xDF:
case 0xC3:
case 0xCE:
+ do_foldchar:
if (!LOC && FOLD) {
U32 len,cp;
len=0; /* silence a spurious compiler warning */
@@ -6893,7 +6903,11 @@ tryagain:
required, as the default for this switch is to jump to the
literal text handling code.
*/
- switch (*++RExC_parse) {
+ switch ((U8)*++RExC_parse) {
+ case 0xDF:
+ case 0xC3:
+ case 0xCE:
+ goto do_foldchar;
/* Special Escapes */
case 'A':
RExC_seen_zerolen++;
@@ -7211,8 +7225,13 @@ tryagain:
an unescaped equivalent literal.
*/
- switch (*++p) {
+ switch ((U8)*++p) {
/* These are all the special escapes. */
+ case 0xDF:
+ case 0xC3:
+ case 0xCE:
+ if (LOC || !FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF))
+ goto normal_default;
case 'A': /* Start assertion */
case 'b': case 'B': /* Word-boundary assertion*/
case 'C': /* Single char !DANGEROUS! */