summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>1998-05-13 09:47:11 +0000
committerGurusamy Sarathy <gsar@cpan.org>1998-05-13 09:47:11 +0000
commitae5c130cf43baa916c1292cd85a40d054824ba20 (patch)
treeab127484971fa5731259a6d386ac1b6ccfc334cb
parentca48fea705cdd276964538988256b00e487bc483 (diff)
downloadperl-ae5c130cf43baa916c1292cd85a40d054824ba20.tar.gz
[win32] merge change#664 from maint branch
p4raw-link: @664 on //depot/maint-5.004/perl: c3ae1fa52acf9130fcc1770ad2fce8519766b744 p4raw-id: //depot/win32/perl@926
-rw-r--r--regcomp.c104
-rw-r--r--regcomp.h7
-rw-r--r--regexec.c14
3 files changed, 73 insertions, 52 deletions
diff --git a/regcomp.c b/regcomp.c
index 0f48976db8..8d66f387ab 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -127,7 +127,6 @@ static regnode *reg_node _((U8));
static regnode *regpiece _((I32 *));
static void reginsert _((U8, regnode *));
static void regoptail _((regnode *, regnode *));
-static void regset _((char *, I32));
static void regtail _((regnode *, regnode *));
static char* regwhite _((char *, char *));
static char* nextchar _((void));
@@ -1831,15 +1830,6 @@ regwhite(char *p, char *e)
return p;
}
-static void
-regset(char *opnd, register I32 c)
-{
- if (SIZE_ONLY)
- return;
- c &= 0xFF;
- opnd[1 + (c >> 3)] |= (1 << (c & 7));
-}
-
static regnode *
regclass(void)
{
@@ -1903,63 +1893,67 @@ regclass(void)
Class = UCHARAT(regparse++);
switch (Class) {
case 'w':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_ALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'W':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NALNUML;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isALNUM(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isALNUM(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 's':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_SPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'S':
- if (regflags & PMf_LOCALE) {
- if (!SIZE_ONLY)
+ if (!SIZE_ONLY) {
+ if (regflags & PMf_LOCALE)
*opnd |= ANYOF_NSPACEL;
- }
- else {
- for (Class = 0; Class < 256; Class++)
- if (!isSPACE(Class))
- regset(opnd, Class);
+ else {
+ for (Class = 0; Class < 256; Class++)
+ if (!isSPACE(Class))
+ ANYOF_SET(opnd, Class);
+ }
}
lastclass = 1234;
continue;
case 'd':
- for (Class = '0'; Class <= '9'; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = '0'; Class <= '9'; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'D':
- for (Class = 0; Class < '0'; Class++)
- regset(opnd, Class);
- for (Class = '9' + 1; Class < 256; Class++)
- regset(opnd, Class);
+ if (!SIZE_ONLY) {
+ for (Class = 0; Class < '0'; Class++)
+ ANYOF_SET(opnd, Class);
+ for (Class = '9' + 1; Class < 256; Class++)
+ ANYOF_SET(opnd, Class);
+ }
lastclass = 1234;
continue;
case 'n':
@@ -2012,13 +2006,31 @@ regclass(void)
continue; /* do it next time */
}
}
- for ( ; lastclass <= Class; lastclass++)
- regset(opnd, lastclass);
+ if (!SIZE_ONLY) {
+ for ( ; lastclass <= Class; lastclass++)
+ ANYOF_SET(opnd, lastclass);
+ }
lastclass = Class;
}
if (*regparse != ']')
FAIL("unmatched [] in regexp");
nextchar();
+ /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
+ if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+ for (Class = 0; Class < 256; ++Class) {
+ if (ANYOF_TEST(opnd, Class)) {
+ I32 cf = fold[Class];
+ ANYOF_SET(opnd, cf);
+ }
+ }
+ *opnd &= ~ANYOF_FOLD;
+ }
+ /* optimize inverted simple patterns (e.g. [^a-z]) */
+ if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) {
+ for (Class = 0; Class < 32; ++Class)
+ opnd[1 + Class] ^= 0xFF;
+ *opnd = 0;
+ }
return ret;
}
diff --git a/regcomp.h b/regcomp.h
index 4b86a8d781..0bd00e2a6d 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -370,6 +370,13 @@ typedef char* regnode;
#define ANYOF_SPACEL 0x02
#define ANYOF_NSPACEL 0x01
+/* Utility macros for bitmap of ANYOF */
+#define ANYOF_BYTE(p,c) (p)[1 + (((c) >> 3) & 31)]
+#define ANYOF_BIT(c) (1 << ((c) & 7))
+#define ANYOF_SET(p,c) (ANYOF_BYTE(p,c) |= ANYOF_BIT(c))
+#define ANYOF_CLEAR(p,c) (ANYOF_BYTE(p,c) &= ~ANYOF_BIT(c))
+#define ANYOF_TEST(p,c) (ANYOF_BYTE(p,c) & ANYOF_BIT(c))
+
#ifdef REGALIGN_STRUCT
#define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1)
#else
diff --git a/regexec.c b/regexec.c
index 250704c228..a9f2751cda 100644
--- a/regexec.c
+++ b/regexec.c
@@ -114,9 +114,11 @@ static I32 regmatch _((regnode *prog));
static I32 regrepeat _((regnode *p, I32 max));
static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
static I32 regtry _((regexp *prog, char *startpos));
+
static bool reginclass _((char *p, I32 c));
static CHECKPOINT regcppush _((I32 parenfloor));
static char * regcppop _((void));
+#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
static CHECKPOINT
regcppush(I32 parenfloor)
@@ -422,7 +424,7 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, cha
case ANYOF:
Class = (char *) OPERAND(c);
while (s < strend) {
- if (reginclass(Class, *s)) {
+ if (REGINCLASS(Class, *s)) {
if (tmp && regtry(prog, s))
goto got_it;
else
@@ -890,7 +892,7 @@ regmatch(regnode *prog)
s = (char *) OPERAND(scan);
if (nextchar < 0)
nextchar = UCHARAT(locinput);
- if (!reginclass(s, nextchar))
+ if (!REGINCLASS(s, nextchar))
sayNO;
if (!nextchar && locinput >= regeol)
sayNO;
@@ -1663,7 +1665,7 @@ regrepeat(regnode *p, I32 max)
scan++;
break;
case ANYOF:
- while (scan < loceol && reginclass(opnd, *scan))
+ while (scan < loceol && REGINCLASS(opnd, *scan))
scan++;
break;
case ALNUM:
@@ -1774,7 +1776,7 @@ reginclass(register char *p, register I32 c)
bool match = FALSE;
c &= 0xFF;
- if (p[1 + (c >> 3)] & (1 << (c & 7)))
+ if (ANYOF_TEST(p, c))
match = TRUE;
else if (flags & ANYOF_FOLD) {
I32 cf;
@@ -1784,7 +1786,7 @@ reginclass(register char *p, register I32 c)
}
else
cf = fold[c];
- if (p[1 + (cf >> 3)] & (1 << (cf & 7)))
+ if (ANYOF_TEST(p, cf))
match = TRUE;
}
@@ -1800,7 +1802,7 @@ reginclass(register char *p, register I32 c)
}
}
- return match ^ ((flags & ANYOF_INVERT) != 0);
+ return (flags & ANYOF_INVERT) ? !match : match;
}