diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2002-01-02 03:59:22 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2002-01-02 03:59:22 +0000 |
commit | 5469e704aecd76ac4296e61a7da26384c20121df (patch) | |
tree | 1f0cbbbf7b3771d636c0da23f782826ff89594c5 /regexec.c | |
parent | 90e434f8137374c16423b24d56379f90ac2ff006 (diff) | |
download | perl-5469e704aecd76ac4296e61a7da26384c20121df.tar.gz |
Make ibcmp_utf8() optionally progress in either string for
as long as it takes and optionally record how far it got.
p4raw-id: //depot/perl@14010
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 48 |
1 files changed, 16 insertions, 32 deletions
@@ -980,31 +980,25 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta U8 tmpbuf [UTF8_MAXLEN+1]; U8 foldbuf[UTF8_MAXLEN_FOLD+1]; STRLEN len, foldlen; - STRLEN mlen = utf8_length((U8*)m, (U8*)(m + ln)); - U8* l; /* The last byte of the last character in s. */ if (c1 == c2) { while (s <= e) { c = utf8_to_uvchr((U8*)s, &len); - l = utf8_hop((U8*)s, mlen); if ( c == c1 && (ln == len || - !ibcmp_utf8(s, do_utf8, - l - (U8*)s, - m, UTF, ln)) + !ibcmp_utf8(s, (STRLEN)-1, do_utf8, 0, + m, ln, UTF, 0)) && (norun || regtry(prog, s)) ) goto got_it; else { uvchr_to_utf8(tmpbuf, c); f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); - l = utf8_hop(foldbuf, mlen); if ( f != c && (f == c1 || f == c2) && (ln == foldlen || !ibcmp_utf8((char *)foldbuf, - do_utf8, - l - foldbuf, - m, UTF, ln)) + (STRLEN)-1, do_utf8, 0, + m, ln, UTF, 0)) && (norun || regtry(prog, s)) ) goto got_it; } @@ -1014,7 +1008,6 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta else { while (s <= e) { c = utf8_to_uvchr((U8*)s, &len); - l = utf8_hop((U8*)s, mlen); /* Handle some of the three Greek sigmas cases. * Note that not all the possible combinations @@ -1029,22 +1022,19 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta if ( (c == c1 || c == c2) && (ln == len || - !ibcmp_utf8(s, do_utf8, - l - (U8*)s, - m, UTF, ln)) + !ibcmp_utf8(s, (STRLEN)-1, do_utf8, 0, + m, ln, UTF, 0)) && (norun || regtry(prog, s)) ) goto got_it; else { uvchr_to_utf8(tmpbuf, c); f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); - l = utf8_hop(foldbuf, mlen); if ( f != c && (f == c1 || f == c2) && (ln == foldlen || !ibcmp_utf8((char *)foldbuf, - do_utf8, - l - foldbuf, - m, UTF, ln)) + (STRLEN)-1, do_utf8, 0, + m, ln, UTF, 0)) && (norun || regtry(prog, s)) ) goto got_it; } @@ -2352,16 +2342,10 @@ S_regmatch(pTHX_ regnode *prog) if (do_utf8 || UTF) { /* Either target or the pattern are utf8. */ - STRLEN slen = utf8_length((U8*)s, (U8*)e); - char *lend = (char *)utf8_hop((U8*)l, slen); - if (ibcmp_utf8(s, TRUE, e - s, - l, TRUE, lend - l)) + if (ibcmp_utf8(s, e - s, TRUE, 0, + l, (STRLEN)-1, TRUE, &l)) sayNO; - else { - l = lend; - s = e; - } locinput = l; nextchr = UCHARAT(locinput); break; @@ -4183,14 +4167,14 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8) if (swash_fetch(sw, p, do_utf8)) match = TRUE; else if (flags & ANYOF_FOLD) { - STRLEN ulen; - U8 tmpbuf[UTF8_MAXLEN_FOLD+1]; + U8 foldbuf[UTF8_MAXLEN_FOLD+1]; + STRLEN foldlen; - to_utf8_fold(p, tmpbuf, &ulen); - if (swash_fetch(sw, tmpbuf, do_utf8)) + to_utf8_fold(p, foldbuf, &foldlen); + if (swash_fetch(sw, foldbuf, do_utf8)) match = TRUE; - to_utf8_upper(p, tmpbuf, &ulen); - if (swash_fetch(sw, tmpbuf, do_utf8)) + to_utf8_upper(p, foldbuf, &foldlen); + if (swash_fetch(sw, foldbuf, do_utf8)) match = TRUE; } } |