summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorLarry Wall <lwall@netlabs.com>1991-11-05 06:28:06 +0000
committerLarry Wall <lwall@netlabs.com>1991-11-05 06:28:06 +0000
commitf0fcb5529109ae3ced6c7fbb8cbd77162fa9bfdb (patch)
tree1f4224435eb95a0b48c30422d7d2f8bf73343aec /regcomp.c
parent45d8adaa83210dbf286f70ae01d99f534e6c8052 (diff)
downloadperl-f0fcb5529109ae3ced6c7fbb8cbd77162fa9bfdb.tar.gz
perl 4.0 patch 16: patch #11, continued
See patch #11.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c60
1 files changed, 41 insertions, 19 deletions
diff --git a/regcomp.c b/regcomp.c
index 92e43a3c58..0fd50c0e5e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7,9 +7,15 @@
* blame Henry for some of the lack of readability.
*/
-/* $RCSfile: regcomp.c,v $$Revision: 4.0.1.2 $$Date: 91/06/07 11:48:24 $
+/* $RCSfile: regcomp.c,v $$Revision: 4.0.1.3 $$Date: 91/11/05 18:22:28 $
*
* $Log: regcomp.c,v $
+ * Revision 4.0.1.3 91/11/05 18:22:28 lwall
+ * patch11: minimum match length calculation in regexp is now cumulative
+ * patch11: initial .* in pattern had dependency on value of $*
+ * patch11: certain patterns made use of garbage pointers from uncleared memory
+ * patch11: prepared for ctype implementations that don't define isascii()
+ *
* Revision 4.0.1.2 91/06/07 11:48:24 lwall
* patch4: new copyright notice
* patch4: /(x+) \1/ incorrectly optimized to not match "xxx xx"
@@ -22,7 +28,7 @@
* 4.0 baseline.
*
*/
-
+/*SUPPRESS 112*/
/*
* regcomp and regexec -- regsub and regerror are not used in perl
*
@@ -150,6 +156,7 @@ int fold;
int backish;
int backest;
int curback;
+ int minlen;
extern char *safemalloc();
extern char *savestr();
int sawplus = 0;
@@ -168,7 +175,7 @@ int fold;
regnpar = 1;
regsize = 0L;
regcode = &regdummy;
- regc(MAGIC);
+ regc((char)MAGIC);
if (reg(0, &flags) == NULL) {
Safefree(regprecomp);
regprecomp = Nullch;
@@ -193,7 +200,7 @@ int fold;
regparse = exp;
regnpar = 1;
regcode = r->program;
- regc(MAGIC);
+ regc((char)MAGIC);
if (reg(0, &flags) == NULL)
return(NULL);
@@ -233,7 +240,8 @@ int fold;
r->regstclass = first;
else if (OP(first) == BOL ||
(OP(first) == STAR && OP(NEXTOPER(first)) == ANY) ) {
- r->reganch = ROPT_ANCH; /* kinda turn .* into ^.* */
+ /* kinda turn .* into ^.* */
+ r->reganch = ROPT_ANCH | ROPT_IMPLICIT;
first = NEXTOPER(first);
goto again;
}
@@ -259,6 +267,7 @@ int fold;
longish = str_make("",0);
longest = str_make("",0);
len = 0;
+ minlen = 0;
curback = 0;
backish = 0;
backest = 0;
@@ -278,6 +287,7 @@ int fold;
first = scan;
while (OP(t = regnext(scan)) == CLOSE)
scan = t;
+ minlen += *OPERAND(first);
if (curback - backish == len) {
str_ncat(longish, OPERAND(first)+1,
*OPERAND(first));
@@ -303,9 +313,16 @@ int fold;
backest = backish;
}
str_nset(longish,"",0);
+ if (OP(scan) == PLUS &&
+ index(simple,OP(NEXTOPER(scan))))
+ minlen++;
+ else if (OP(scan) == CURLY &&
+ index(simple,OP(NEXTOPER(scan)+4)))
+ minlen += ARG1(scan);
}
else if (index(simple,OP(scan))) {
curback++;
+ minlen++;
len = 0;
if (longish->str_cur > longest->str_cur) {
str_sset(longest,longish);
@@ -328,8 +345,9 @@ int fold;
&&
(!r->regstart
||
- !fbminstr(r->regstart->str_ptr,
- r->regstart->str_ptr + r->regstart->str_cur,
+ !fbminstr((unsigned char*) r->regstart->str_ptr,
+ (unsigned char *) r->regstart->str_ptr
+ + r->regstart->str_cur,
longest)
)
)
@@ -354,8 +372,9 @@ int fold;
r->do_folding = fold;
r->nparens = regnpar - 1;
- New(1002, r->startp, regnpar, char*);
- New(1002, r->endp, regnpar, char*);
+ r->minlen = minlen;
+ Newz(1002, r->startp, regnpar, char*);
+ Newz(1002, r->endp, regnpar, char*);
#ifdef DEBUGGING
if (debug & 512)
regdump(r);
@@ -515,7 +534,7 @@ int *flagp;
if (op == '{' && regcurly(regparse)) {
next = regparse + 1;
max = Nullch;
- while (isdigit(*next) || *next == ',') {
+ while (isDIGIT(*next) || *next == ',') {
if (*next == ',') {
if (max)
break;
@@ -758,7 +777,7 @@ int *flagp;
else {
regsawback = 1;
ret = reganode(REF, num);
- while (isascii(*regparse) && isdigit(*regparse))
+ while (isDIGIT(*regparse))
regparse++;
*flagp |= SIMPLE;
}
@@ -839,14 +858,14 @@ int *flagp;
case 'c':
p++;
ender = *p++;
- if (islower(ender))
+ if (isLOWER(ender))
ender = toupper(ender);
ender ^= 64;
break;
case '0': case '1': case '2': case '3':case '4':
case '5': case '6': case '7': case '8':case '9':
if (*p == '0' ||
- (isdigit(p[1]) && atoi(p) >= regnpar) ) {
+ (isDIGIT(p[1]) && atoi(p) >= regnpar) ) {
ender = scanoct(p, 3, &numlen);
p += numlen;
}
@@ -868,7 +887,7 @@ int *flagp;
ender = *p++;
break;
}
- if (regfold && isupper(ender))
+ if (regfold && isUPPER(ender))
ender = tolower(ender);
if (ISMULT2(p)) { /* Back off on ?+*. */
if (len)
@@ -992,7 +1011,7 @@ regclass()
break;
case 'c':
class = *regparse++;
- if (islower(class))
+ if (isLOWER(class))
class = toupper(class);
class ^= 64;
break;
@@ -1019,7 +1038,7 @@ regclass()
}
for ( ; lastclass <= class; lastclass++) {
regset(bits,def,lastclass);
- if (regfold && isupper(lastclass))
+ if (regfold && isUPPER(lastclass))
regset(bits,def,tolower(lastclass));
}
lastclass = class;
@@ -1226,13 +1245,13 @@ register char *s;
{
if (*s++ != '{')
return FALSE;
- if (!isdigit(*s))
+ if (!isDIGIT(*s))
return FALSE;
- while (isdigit(*s))
+ while (isDIGIT(*s))
s++;
if (*s == ',')
s++;
- while (isdigit(*s))
+ while (isDIGIT(*s))
s++;
if (*s != '}')
return FALSE;
@@ -1292,9 +1311,12 @@ regexp *r;
fprintf(stderr,"anchored ");
if (r->reganch & ROPT_SKIP)
fprintf(stderr,"plus ");
+ if (r->reganch & ROPT_IMPLICIT)
+ fprintf(stderr,"implicit ");
if (r->regmust != NULL)
fprintf(stderr,"must have \"%s\" back %d ", r->regmust->str_ptr,
r->regback);
+ fprintf(stderr, "minlen %d ", r->minlen);
fprintf(stderr,"\n");
}