diff options
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 68 |
1 files changed, 51 insertions, 17 deletions
@@ -999,8 +999,10 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta to_utf8_lower((U8*)m, tmpbuf1, &ulen1); to_utf8_upper((U8*)m, tmpbuf2, &ulen2); - c1 = utf8_to_uvchr(tmpbuf1, 0); - c2 = utf8_to_uvchr(tmpbuf2, 0); + c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC, + 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); + c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC, + 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); } else { c1 = *(U8*)m; @@ -1037,7 +1039,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta if (c1 == c2) { while (s <= e) { - c = utf8_to_uvchr((U8*)s, &len); + c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); if ( c == c1 && (ln == len || ibcmp_utf8(s, (char **)0, 0, do_utf8, @@ -1062,7 +1066,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta } else { while (s <= e) { - c = utf8_to_uvchr((U8*)s, &len); + c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); /* Handle some of the three Greek sigmas cases. * Note that not all the possible combinations @@ -2390,7 +2396,9 @@ S_regmatch(pTHX_ regnode *prog) if (l >= PL_regeol) sayNO; if (NATIVE_TO_UNI(*(U8*)s) != - utf8_to_uvuni((U8*)l, &ulen)) + utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY)) sayNO; l += ulen; s ++; @@ -2402,7 +2410,9 @@ S_regmatch(pTHX_ regnode *prog) if (l >= PL_regeol) sayNO; if (NATIVE_TO_UNI(*((U8*)l)) != - utf8_to_uvuni((U8*)s, &ulen)) + utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY)) sayNO; s += ulen; l ++; @@ -3545,11 +3555,17 @@ S_regmatch(pTHX_ regnode *prog) to_utf8_lower((U8*)s, tmpbuf1, &ulen1); to_utf8_upper((U8*)s, tmpbuf2, &ulen2); - c1 = utf8_to_uvuni(tmpbuf1, 0); - c2 = utf8_to_uvuni(tmpbuf2, 0); + c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); + c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); } else { - c2 = c1 = utf8_to_uvchr(s, NULL); + c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); } } } @@ -3605,16 +3621,24 @@ S_regmatch(pTHX_ regnode *prog) else { STRLEN len; if (c1 == c2) { - /* count initialised to utf8_distance(old, locinput) */ + /* count initialised to + * utf8_distance(old, locinput) */ while (locinput <= e && - utf8_to_uvchr((U8*)locinput, &len) != c1) { + utf8n_to_uvchr((U8*)locinput, + UTF8_MAXLEN, &len, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY) != c1) { locinput += len; count++; } } else { - /* count initialised to utf8_distance(old, locinput) */ + /* count initialised to + * utf8_distance(old, locinput) */ while (locinput <= e) { - UV c = utf8_to_uvchr((U8*)locinput, &len); + UV c = utf8n_to_uvchr((U8*)locinput, + UTF8_MAXLEN, &len, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); if (c == c1 || c == c2) break; locinput += len; @@ -3648,7 +3672,10 @@ S_regmatch(pTHX_ regnode *prog) UV c; if (c1 != -1000) { if (do_utf8) - c = utf8_to_uvchr((U8*)PL_reginput, NULL); + c = utf8n_to_uvchr((U8*)PL_reginput, + UTF8_MAXLEN, 0, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); else c = UCHARAT(PL_reginput); /* If it could work, try it. */ @@ -3695,7 +3722,10 @@ S_regmatch(pTHX_ regnode *prog) while (n >= ln) { if (c1 != -1000) { if (do_utf8) - c = utf8_to_uvchr((U8*)PL_reginput, NULL); + c = utf8n_to_uvchr((U8*)PL_reginput, + UTF8_MAXLEN, 0, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); else c = UCHARAT(PL_reginput); } @@ -3715,7 +3745,10 @@ S_regmatch(pTHX_ regnode *prog) while (n >= ln) { if (c1 != -1000) { if (do_utf8) - c = utf8_to_uvchr((U8*)PL_reginput, NULL); + c = utf8n_to_uvchr((U8*)PL_reginput, + UTF8_MAXLEN, 0, + ckWARN(WARN_UTF8) ? + 0 : UTF8_ALLOW_ANY); else c = UCHARAT(PL_reginput); } @@ -4297,7 +4330,8 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register b STRLEN len = 0; STRLEN plen; - c = do_utf8 ? utf8_to_uvchr(p, &len) : *p; + c = do_utf8 ? utf8n_to_uvchr(p, UTF8_MAXLEN, &len, + ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY) : *p; plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c)); if (do_utf8 || (flags & ANYOF_UNICODE)) { |