diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2002-01-01 17:29:05 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2002-01-01 17:29:05 +0000 |
commit | c7254714b14d9ee5485ea0c63c897c9eb3c2c21e (patch) | |
tree | 8a2f3646da3dba2fd41ae5957ddec514f27d10ea | |
parent | 4f048c5d37e8541475c2e2ea1b688cb6fc847914 (diff) | |
download | perl-c7254714b14d9ee5485ea0c63c897c9eb3c2c21e.tar.gz |
Better support for multicharacter foldings.
Now all but two of the CaseFold.txt cases work-- but
only when the target string is single-character, more
debugging needed.
p4raw-id: //depot/perl@14001
-rw-r--r-- | regexec.c | 50 |
1 files changed, 28 insertions, 22 deletions
@@ -960,7 +960,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta c1 = *(U8*)m; c2 = PL_fold_locale[c1]; do_exactf: - e = strend - ln; + e = do_utf8 ? s + ln - 1 : strend - ln; if (norun && e < s) e = s; /* Due to minlen logic of intuit() */ @@ -2406,31 +2406,37 @@ S_regmatch(pTHX_ regnode *prog) if (do_utf8 && UTF) { /* Both the target and the pattern are utf8. */ - while (s < e) { - if (l >= PL_regeol) - sayNO; - if (UTF8SKIP(s) != UTF8SKIP(l) || - memNE(s, (char*)l, UTF8SKIP(s))) { - U8 lfoldbuf[UTF8_MAXLEN_FOLD+1]; - STRLEN lfoldlen; + U8 lfoldbuf[UTF8_MAXLEN_FOLD+1], *lf; + U8 sfoldbuf[UTF8_MAXLEN_FOLD+1], *sf; + STRLEN lfoldlen, sfoldlen; + STRLEN llen = 0; + STRLEN slen = 0; - /* Try one of them folded. */ + while (s < e) { + /* Fold them and walk them characterwise. */ + if (llen == 0) { to_utf8_fold((U8*)l, lfoldbuf, &lfoldlen); - if (UTF8SKIP(s) != lfoldlen || - memNE(s, (char*)lfoldbuf, lfoldlen)) { - U8 sfoldbuf[UTF8_MAXLEN_FOLD+1]; - STRLEN sfoldlen; - - /* Try both of them folded. */ - - to_utf8_fold((U8*)s, sfoldbuf, &sfoldlen); - if (sfoldlen != lfoldlen || - memNE((char*)sfoldbuf, - (char*)lfoldbuf, lfoldlen)) - sayNO; - } + lf = lfoldbuf; + llen = lfoldlen; + } + + if (slen == 0) { + to_utf8_fold((U8*)s, sfoldbuf, &sfoldlen); + sf = sfoldbuf; + slen = sfoldlen; + } + + while (llen && slen) { + if (UTF8SKIP(lf) != UTF8SKIP(sf) || + memNE((char*)lf, (char*)sf, UTF8SKIP(lf))) + sayNO; + llen -= UTF8SKIP(lf); + lf += UTF8SKIP(lf); + slen -= UTF8SKIP(sf); + sf += UTF8SKIP(sf); } + l += UTF8SKIP(l); s += UTF8SKIP(s); } |