summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-01-01 17:29:05 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-01-01 17:29:05 +0000
commit29d6084b687161adff78f09743f72587598e21f4 (patch)
tree8a2f3646da3dba2fd41ae5957ddec514f27d10ea /regexec.c
parent7ab11b42f9c80d5ab8f65a68f4ae50e1999b437e (diff)
downloadperl-29d6084b687161adff78f09743f72587598e21f4.tar.gz
Better support for multicharacter foldings.
Now all but two of the CaseFold.txt cases work-- but only when the target string is single-character, more debugging needed. p4raw-id: //depot/perl@14001
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c50
1 files changed, 28 insertions, 22 deletions
diff --git a/regexec.c b/regexec.c
index 0f738d1b27..4db47290fa 100644
--- a/regexec.c
+++ b/regexec.c
@@ -960,7 +960,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
c1 = *(U8*)m;
c2 = PL_fold_locale[c1];
do_exactf:
- e = strend - ln;
+ e = do_utf8 ? s + ln - 1 : strend - ln;
if (norun && e < s)
e = s; /* Due to minlen logic of intuit() */
@@ -2406,31 +2406,37 @@ S_regmatch(pTHX_ regnode *prog)
if (do_utf8 && UTF) {
/* Both the target and the pattern are utf8. */
- while (s < e) {
- if (l >= PL_regeol)
- sayNO;
- if (UTF8SKIP(s) != UTF8SKIP(l) ||
- memNE(s, (char*)l, UTF8SKIP(s))) {
- U8 lfoldbuf[UTF8_MAXLEN_FOLD+1];
- STRLEN lfoldlen;
+ U8 lfoldbuf[UTF8_MAXLEN_FOLD+1], *lf;
+ U8 sfoldbuf[UTF8_MAXLEN_FOLD+1], *sf;
+ STRLEN lfoldlen, sfoldlen;
+ STRLEN llen = 0;
+ STRLEN slen = 0;
- /* Try one of them folded. */
+ while (s < e) {
+ /* Fold them and walk them characterwise. */
+ if (llen == 0) {
to_utf8_fold((U8*)l, lfoldbuf, &lfoldlen);
- if (UTF8SKIP(s) != lfoldlen ||
- memNE(s, (char*)lfoldbuf, lfoldlen)) {
- U8 sfoldbuf[UTF8_MAXLEN_FOLD+1];
- STRLEN sfoldlen;
-
- /* Try both of them folded. */
-
- to_utf8_fold((U8*)s, sfoldbuf, &sfoldlen);
- if (sfoldlen != lfoldlen ||
- memNE((char*)sfoldbuf,
- (char*)lfoldbuf, lfoldlen))
- sayNO;
- }
+ lf = lfoldbuf;
+ llen = lfoldlen;
+ }
+
+ if (slen == 0) {
+ to_utf8_fold((U8*)s, sfoldbuf, &sfoldlen);
+ sf = sfoldbuf;
+ slen = sfoldlen;
+ }
+
+ while (llen && slen) {
+ if (UTF8SKIP(lf) != UTF8SKIP(sf) ||
+ memNE((char*)lf, (char*)sf, UTF8SKIP(lf)))
+ sayNO;
+ llen -= UTF8SKIP(lf);
+ lf += UTF8SKIP(lf);
+ slen -= UTF8SKIP(sf);
+ sf += UTF8SKIP(sf);
}
+
l += UTF8SKIP(l);
s += UTF8SKIP(s);
}