summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2004-07-09 03:35:22 +0300
committerNicholas Clark <nick@ccl4.org>2004-07-09 21:03:11 +0000
commit078c425b721ea35d568a7ec980e3b95d036a25c3 (patch)
treecff697fcf7c4b60e239516385829f29dd0c26a4a /regexec.c
parentcd793d32bae0d3b42e0c38fe944828bcfc96d2aa (diff)
downloadperl-078c425b721ea35d568a7ec980e3b95d036a25c3.tar.gz
Re: Segfault using HTML::Entities
Message-ID: <40EDBE1A.6080205@iki.fi> Date: Fri, 09 Jul 2004 00:35:22 +0300 p4raw-id: //depot/perl@23074
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c61
1 files changed, 31 insertions, 30 deletions
diff --git a/regexec.c b/regexec.c
index 728b1ae97e..60276cb44d 100644
--- a/regexec.c
+++ b/regexec.c
@@ -954,6 +954,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
char *m;
STRLEN ln;
STRLEN lnc;
+ register STRLEN uskip;
unsigned int c1;
unsigned int c2;
char *e;
@@ -964,7 +965,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
switch (OP(c)) {
case ANYOF:
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
!UTF8_IS_INVARIANT((U8)s[0]) ?
reginclass(c, (U8*)s, 0, do_utf8) :
@@ -976,7 +977,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1172,7 +1173,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS(alnum,"a");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (tmp == !(OP(c) == BOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
@@ -1181,7 +1182,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if ((norun || regtry(prog, s)))
goto got_it;
}
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1215,14 +1216,14 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS(alnum,"a");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (tmp == !(OP(c) == NBOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
else if ((norun || regtry(prog, s)))
goto got_it;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1244,7 +1245,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case ALNUM:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1253,7 +1254,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1273,7 +1274,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case ALNUML:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (isALNUM_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1282,7 +1283,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1302,7 +1303,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case NALNUM:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1311,7 +1312,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1331,7 +1332,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case NALNUML:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!isALNUM_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1340,7 +1341,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1360,7 +1361,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case SPACE:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1369,7 +1370,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1389,7 +1390,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case SPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1398,7 +1399,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1418,7 +1419,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case NSPACE:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1427,7 +1428,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1447,7 +1448,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case NSPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1456,7 +1457,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1476,7 +1477,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case DIGIT:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1485,7 +1486,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1505,7 +1506,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case DIGITL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (isDIGIT_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1514,7 +1515,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1534,7 +1535,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case NDIGIT:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1543,7 +1544,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
@@ -1563,7 +1564,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
case NDIGITL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!isDIGIT_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
@@ -1572,7 +1573,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {