summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c48
-rw-r--r--regexec.c72
2 files changed, 89 insertions, 31 deletions
diff --git a/regcomp.c b/regcomp.c
index b061991342..cac14bf8e6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2986,7 +2986,8 @@ tryagain:
char *oldp, *s;
STRLEN numlen;
STRLEN ulen;
- U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
+ STRLEN foldlen;
+ U8 tmpbuf[UTF8_MAXLEN_UCLC+1], *foldbuf;
parse_start = RExC_parse - 1;
@@ -3130,17 +3131,28 @@ tryagain:
}
if (RExC_flags16 & PMf_EXTENDED)
p = regwhite(p, RExC_end);
- if (UTF && FOLD) {
- toFOLD_uni(ender, tmpbuf, &ulen);
- ender = utf8_to_uvchr(tmpbuf, 0);
- }
+ if (UTF && FOLD)
+ toFOLD_uni(ender, tmpbuf, &foldlen);
if (ISMULT2(p)) { /* Back off on ?+*. */
if (len)
p = oldp;
else if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
- reguni(pRExC_state, ender, s, &numlen);
- s += numlen;
- len += numlen;
+ if (FOLD) {
+ for (foldbuf = tmpbuf;
+ foldlen;
+ foldlen -= numlen) {
+ ender = utf8_to_uvchr(foldbuf, &numlen);
+ reguni(pRExC_state, ender, s, &numlen);
+ s += numlen;
+ len += numlen;
+ foldbuf += numlen;
+ }
+ }
+ else {
+ reguni(pRExC_state, ender, s, &numlen);
+ s += numlen;
+ len += numlen;
+ }
}
else {
len++;
@@ -3149,9 +3161,23 @@ tryagain:
break;
}
if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
- reguni(pRExC_state, ender, s, &numlen);
- s += numlen;
- len += numlen - 1;
+ if (FOLD) {
+ for (foldbuf = tmpbuf;
+ foldlen;
+ foldlen -= numlen) {
+ ender = utf8_to_uvchr(foldbuf, &numlen);
+ reguni(pRExC_state, ender, s, &numlen);
+ s += numlen;
+ len += numlen;
+ foldbuf += numlen;
+ }
+ }
+ else {
+ reguni(pRExC_state, ender, s, &numlen);
+ s += numlen;
+ len += numlen;
+ }
+ len--;
}
else
REGC(ender, s++);
diff --git a/regexec.c b/regexec.c
index c932165941..e67774dc22 100644
--- a/regexec.c
+++ b/regexec.c
@@ -979,38 +979,59 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
* Fortunately, not getting this right is allowed
* for Unicode Regular Expression Support level 1,
* only one-to-one matching is required. --jhi */
+
if (c1 == c2) {
while (s <= e) {
c = utf8_to_uvchr((U8*)s, &len);
- uvchr_to_utf8(tmpbuf, c);
- to_utf8_fold(tmpbuf, foldbuf, &foldlen);
- f = utf8_to_uvchr(foldbuf, 0);
-
- if ( ((c == c1 && ln == len) ||
- (f == c1 && ln == foldlen) ||
- !ibcmp_utf8(s, do_utf8, (I32)(strend - s),
- m, UTF, (I32)ln))
+ if ( c == c1
+ && (ln == len ||
+ !ibcmp_utf8(s, do_utf8, strend - s,
+ m, UTF, ln))
&& (norun || regtry(prog, s)) )
goto got_it;
+ else {
+ uvchr_to_utf8(tmpbuf, c);
+ to_utf8_fold(tmpbuf, foldbuf, &foldlen);
+ f = utf8_to_uvchr(foldbuf, 0);
+ if ( f != c
+ && (f == c1 || f == c2)
+ && (ln == foldlen ||
+ !ibcmp_utf8((char *)foldbuf,
+ do_utf8, foldlen,
+ m, UTF, ln))
+ && (norun || regtry(prog, s)) )
+ goto got_it;
+ }
s += len;
}
}
else {
while (s <= e) {
c = utf8_to_uvchr((U8*)s, &len);
- uvchr_to_utf8(tmpbuf, c);
- to_utf8_fold(tmpbuf, foldbuf, &foldlen);
- f = utf8_to_uvchr(foldbuf, 0);
-
+
if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
- if ( (((c == c1 || c == c2) && ln == len) ||
- ((f == c1 || f == c2) && ln == foldlen) ||
- !ibcmp_utf8(s, do_utf8, (I32)(strend - s),
- m, UTF, (I32)ln))
+
+ if ( (c == c1 || c == c2)
+ && (ln == len ||
+ !ibcmp_utf8(s, do_utf8, strend - s,
+ m, UTF, ln))
&& (norun || regtry(prog, s)) )
goto got_it;
+ else {
+ uvchr_to_utf8(tmpbuf, c);
+ to_utf8_fold(tmpbuf, foldbuf, &foldlen);
+ f = utf8_to_uvchr(foldbuf, 0);
+ if ( f != c
+ && (f == c1 || f == c2)
+ && (ln == foldlen ||
+ !ibcmp_utf8((char *)foldbuf,
+ do_utf8, foldlen,
+ m, UTF, ln))
+ && (norun || regtry(prog, s)) )
+ goto got_it;
+ }
s += len;
}
}
@@ -2372,10 +2393,21 @@ S_regmatch(pTHX_ regnode *prog)
sayNO;
if (UTF8SKIP(s) != UTF8SKIP(l) ||
memNE(s, (char*)l, UTF8SKIP(s))) {
- to_utf8_fold((U8*)l, tmpbuf, &ulen);
- if (UTF8SKIP(s) != ulen ||
- memNE(s, (char*)tmpbuf, ulen))
- sayNO;
+ U8 lfoldbuf[UTF8_MAXLEN_FOLD+1];
+ STRLEN lfoldlen;
+
+ to_utf8_fold((U8*)l, lfoldbuf, &lfoldlen);
+ if (UTF8SKIP(s) != lfoldlen ||
+ memNE(s, (char*)lfoldbuf, lfoldlen)) {
+ U8 sfoldbuf[UTF8_MAXLEN_FOLD+1];
+ STRLEN sfoldlen;
+
+ to_utf8_fold((U8*)s, sfoldbuf, &sfoldlen);
+ if (sfoldlen != lfoldlen ||
+ memNE((char*)sfoldbuf,
+ (char*)lfoldbuf, lfoldlen))
+ sayNO;
+ }
}
l += UTF8SKIP(l);
s += UTF8SKIP(s);