diff options
-rw-r--r-- | regcomp.c | 84 | ||||
-rw-r--r-- | regcomp.h | 115 | ||||
-rw-r--r-- | regexec.c | 10 |
3 files changed, 22 insertions, 187 deletions
@@ -429,7 +429,6 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 delta += (minnext + deltanext) * maxcount - minnext * mincount; /* Try powerful optimization CURLYX => CURLYN. */ -#ifdef REGALIGN_STRUCT if ( OP(oscan) == CURLYX && data && data->flags & SF_IN_PAR && !(data->flags & SF_HAS_EVAL) @@ -461,16 +460,11 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */ #endif } -#endif nogo: /* Try optimization CURLYX => CURLYM. */ if ( OP(oscan) == CURLYX && data -#ifdef REGALIGN_STRUCT && !(data->flags & SF_HAS_PAR) -#else - && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) -#endif && !(data->flags & SF_HAS_EVAL) && !deltanext ) { /* XXXX How to optimize if data == 0? */ @@ -483,7 +477,6 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 && (OP(nxt2) != WHILEM)) nxt = nxt2; OP(nxt2) = SUCCEED; /* Whas WHILEM */ -#ifdef REGALIGN_STRUCT /* Need to optimize away parenths. */ if (data->flags & SF_IN_PAR) { /* Set the parenth number. */ @@ -519,7 +512,6 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 study_chunk(&nxt1, &deltanext, nxt, NULL, 0); } else oscan->flags = 0; -#endif } if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) pars++; @@ -573,13 +565,11 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 if (data && (fl & SF_HAS_EVAL)) data->flags |= SF_HAS_EVAL; optimize_curly_tail: -#ifdef REGALIGN if (OP(oscan) != CURLYX) { while (regkind[(U8)OP(next = regnext(oscan))] == NOTHING && NEXT_OFF(next)) NEXT_OFF(oscan) += NEXT_OFF(next); } -#endif continue; default: /* REF only? */ if (flags & SCF_DO_SUBSTR) { @@ -624,11 +614,9 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 } else if (OP(scan) == OPEN) { pars++; } else if (OP(scan) == CLOSE && ARG(scan) == is_par) { -#ifdef REGALIGN_STRUCT next = regnext(scan); if ( next && (OP(next) != WHILEM) && next < last) -#endif is_par = 0; /* Disable optimization */ } else if (OP(scan) == EVAL) { if (data) @@ -758,15 +746,10 @@ pregcomp(char *exp, char *xend, PMOP *pm) /* Small enough for pointer-storage convention? If extralen==0, this means that we will not need long jumps. */ -#ifndef REGALIGN_STRUCT - if (regsize >= 0x10000L && extralen) - FAIL("regexp too big"); -#else if (regsize >= 0x10000L && extralen) regsize += extralen; else extralen = 0; -#endif /* Allocate space and initialize. */ Newc(1001, r, sizeof(regexp) + (unsigned)regsize * sizeof(regnode), @@ -993,9 +976,6 @@ reg(I32 paren, I32 *flagp) ret = NULL; /* For look-ahead/behind. */ switch (paren) { case '<': -#ifndef REGALIGN_STRUCT - FAIL("lookbehind non-implemented without REGALIGN_STRUCT"); -#endif regseen |= REG_SEEN_LOOKBEHIND; if (*regcomp_parse == '!') paren = ','; @@ -1224,9 +1204,7 @@ reg(I32 paren, I32 *flagp) if (paren == '>') node = SUSPEND, flag = 0; reginsert(node,ret); -#ifdef REGALIGN_STRUCT ret->flags = flag; -#endif regtail(ret, reg_node(TAIL)); } } @@ -1387,9 +1365,7 @@ regpiece(I32 *flagp) if (SIZE_ONLY) extralen += 3; } -#ifdef REGALIGN_STRUCT ret->flags = 0; -#endif if (min > 0) *flagp = (WORST|HASWIDTH); @@ -1420,9 +1396,7 @@ regpiece(I32 *flagp) if (op == '*' && (flags&SIMPLE)) { reginsert(STAR, ret); -#ifdef REGALIGN_STRUCT ret->flags = 0; -#endif regnaughty += 4; } else if (op == '*') { @@ -1430,9 +1404,7 @@ regpiece(I32 *flagp) goto do_curly; } else if (op == '+' && (flags&SIMPLE)) { reginsert(PLUS, ret); -#ifdef REGALIGN_STRUCT ret->flags = 0; -#endif regnaughty += 3; } else if (op == '+') { @@ -1451,11 +1423,7 @@ regpiece(I32 *flagp) if (*regcomp_parse == '?') { nextchar(); reginsert(MINMOD, ret); -#ifdef REGALIGN regtail(ret, ret + NODE_STEP_REGNODE); -#else - regtail(ret, ret + 3); -#endif } if (ISMULT2(regcomp_parse)) FAIL("nested *?+ in regexp"); @@ -1787,9 +1755,7 @@ tryagain: *OPERAND(ret) = len; regc('\0', s++); if (SIZE_ONLY) { -#ifdef REGALIGN_STRUCT regsize += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode); -#endif } else { regcode += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode); } @@ -2062,11 +2028,7 @@ reg_node(U8 op) ret = regcode; if (SIZE_ONLY) { SIZE_ALIGN(regsize); -#ifdef REGALIGN_STRUCT regsize += 1; -#else - regsize += 3; -#endif return(ret); } @@ -2090,11 +2052,7 @@ reganode(U8 op, U32 arg) ret = regcode; if (SIZE_ONLY) { SIZE_ALIGN(regsize); -#ifdef REGALIGN regsize += 2; -#else - regsize += 5; -#endif return(ret); } @@ -2146,9 +2104,6 @@ reginsert(U8 op, regnode *opnd) src = NEXTOPER(place); FILL_ADVANCE_NODE(place, op); Zero(src, offset, regnode); -#if defined(REGALIGN) && !defined(REGALIGN_STRUCT) - src[offset + 1] = '\177'; -#endif } /* @@ -2173,27 +2128,11 @@ regtail(regnode *p, regnode *val) scan = temp; } -#ifdef REGALIGN -# ifdef REGALIGN_STRUCT if (reg_off_by_arg[OP(scan)]) { ARG_SET(scan, val - scan); } else { NEXT_OFF(scan) = val - scan; } -# else - offset = val - scan; -# ifndef lint - *(short*)(scan+1) = offset; -# endif -#endif -#else - if (OP(scan) == BACK) - offset = scan - val; - else - offset = val - scan; - *(scan+1) = (offset>>8)&0377; - *(scan+2) = offset&0377; -#endif } /* @@ -2441,18 +2380,10 @@ regprop(SV *sv, regnode *o) sv_catpvf(sv, "CURLY {%d,%d}", ARG1(o), ARG2(o)); break; case CURLYM: -#ifdef REGALIGN_STRUCT sv_catpvf(sv, "CURLYM[%d] {%d,%d}", o->flags, ARG1(o), ARG2(o)); -#else - sv_catpvf(sv, "CURLYM {%d,%d}", ARG1(o), ARG2(o)); -#endif break; case CURLYN: -#ifdef REGALIGN_STRUCT sv_catpvf(sv, "CURLYN[%d] {%d,%d}", o->flags, ARG1(o), ARG2(o)); -#else - sv_catpvf(sv, "CURLYN {%d,%d}", ARG1(o), ARG2(o)); -#endif break; case CURLYX: sv_catpvf(sv, "CURLYX {%d,%d}", ARG1(o), ARG2(o)); @@ -2486,18 +2417,10 @@ regprop(SV *sv, regnode *o) p = "GPOS"; break; case UNLESSM: -#ifdef REGALIGN_STRUCT sv_catpvf(sv, "UNLESSM[-%d]", o->flags); -#else - p = "UNLESSM"; -#endif break; case IFMATCH: -#ifdef REGALIGN_STRUCT sv_catpvf(sv, "IFMATCH[-%d]", o->flags); -#else - p = "IFMATCH"; -#endif break; case SUCCEED: p = "SUCCEED"; @@ -2628,14 +2551,7 @@ regnext(register regnode *p) if (offset == 0) return(NULL); -#ifdef REGALIGN return(p+offset); -#else - if (OP(p) == BACK) - return(p-offset); - else - return(p+offset); -#endif } STATIC void @@ -33,18 +33,6 @@ typedef OP OP_4tree; /* Will be redefined later. */ * For instance, /[a-z].foo/ has a regmust of 'foo' and a regback of 2.] */ -/* #ifndef gould */ -/* #ifndef cray */ -/* #ifndef eta10 */ -#define REGALIGN -/* #endif */ -/* #endif */ -/* #endif */ - -#ifdef REGALIGN -# define REGALIGN_STRUCT -#endif - /* * Structure for regexp "program". This is essentially a linear encoding * of a nondeterministic finite-state machine (aka syntax charts or @@ -112,8 +100,8 @@ typedef OP OP_4tree; /* Will be redefined later. */ #define REFF 46 /* num Match already matched string, folded */ #define REFFL 47 /* num Match already matched string, folded in loc. */ #define EVAL 48 /* evl Execute some Perl code. */ -#define LONGJMP 49 /* off Jump far away, requires REGALIGN_STRUCT. */ -#define BRANCHJ 50 /* off BRANCH with long offset, requires REGALIGN_STRUCT. */ +#define LONGJMP 49 /* off Jump far away. */ +#define BRANCHJ 50 /* off BRANCH with long offset. */ #define IFTHEN 51 /* off Switch, should be preceeded by switcher . */ #define GROUPP 52 /* num Whether the group matched. */ #define LOGICAL 53 /* no Next opcode should set the flag only. */ @@ -239,14 +227,12 @@ EXTCONST char simple[] = { * Using two bytes for the "next" pointer is vast overkill for most things, * but allows patterns to get big without disasters. * - * [If REGALIGN is defined, the "next" pointer is always aligned on an even + * [The "next" pointer is always aligned on an even * boundary, and reads the offset directly as a short. Also, there is no * special test to reverse the sign of BACK pointers since the offset is * stored negative.] */ -#ifdef REGALIGN_STRUCT - struct regnode_string { U8 flags; U8 type; @@ -269,8 +255,6 @@ struct regnode_2 { U16 arg2; }; -#endif - /* XXX fix this description. Impose a limit of REG_INFTY on various pattern matching operations to limit stack growth and to avoid "infinite" recursions. @@ -293,13 +277,8 @@ struct regnode_2 { # define REG_INFTY I16_MAX #endif -#ifdef REGALIGN -# define ARG_VALUE(arg) (arg) -# define ARG__SET(arg,val) ((arg) = (val)) -#else -# define ARG_VALUE(arg) (((*((char*)&arg)&0377)<<8) + (*(((char*)&arg)+1)&0377)) -# define ARG__SET(arg,val) (((char*)&arg)[0] = (val) >> 8; ((char*)&arg)[1] = (val) & 0377;) -#endif +#define ARG_VALUE(arg) (arg) +#define ARG__SET(arg,val) ((arg) = (val)) #define ARG(p) ARG_VALUE(ARG_LOC(p)) #define ARG1(p) ARG_VALUE(ARG1_LOC(p)) @@ -309,69 +288,35 @@ struct regnode_2 { #define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val)) #ifndef lint -# ifdef REGALIGN -# ifdef REGALIGN_STRUCT -# define NEXT_OFF(p) ((p)->next_off) -# define NODE_ALIGN(node) -# define NODE_ALIGN_FILL(node) ((node)->flags = 0xde) /* deadbeef */ -# else -# define NEXT_OFF(p) (*(short*)(p+1)) -# define NODE_ALIGN(node) ((!((long)node & 1)) ? node++ : 0) -# define NODE_ALIGN_FILL(node) ((!((long)node & 1)) ? *node++ = 127 : 0) -# endif -# else -# define NEXT_OFF(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) -# define NODE_ALIGN(node) -# define NODE_ALIGN_FILL(node) -# endif +# define NEXT_OFF(p) ((p)->next_off) +# define NODE_ALIGN(node) +# define NODE_ALIGN_FILL(node) ((node)->flags = 0xde) /* deadbeef */ #else /* lint */ # define NEXT_OFF(p) 0 -# define NODE_ALIGN(node) -# define NODE_ALIGN_FILL(node) +# define NODE_ALIGN(node) +# define NODE_ALIGN_FILL(node) #endif /* lint */ #define SIZE_ALIGN NODE_ALIGN -#ifdef REGALIGN_STRUCT -# define OP(p) ((p)->type) -# define OPERAND(p) (((struct regnode_string *)p)->string) -# define NODE_ALIGN(node) -# define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) -# define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1) -# define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2) -# define NODE_STEP_REGNODE 1 /* sizeof(regnode)/sizeof(regnode) */ -# define EXTRA_STEP_2ARGS EXTRA_SIZE(struct regnode_2) -#else -# define OP(p) (*(p)) -# define OPERAND(p) ((p) + 3) -# define ARG_LOC(p) (*(unsigned short*)(p+3)) -# define ARG1_LOC(p) (*(unsigned short*)(p+3)) -# define ARG2_LOC(p) (*(unsigned short*)(p+5)) -typedef char* regnode; -# define NODE_STEP_REGNODE NODE_STEP_B -# define EXTRA_STEP_2ARGS 4 -#endif - -#ifdef REGALIGN -# define NODE_STEP_B 4 -#else -# define NODE_STEP_B 3 -#endif +#define OP(p) ((p)->type) +#define OPERAND(p) (((struct regnode_string *)p)->string) +#define NODE_ALIGN(node) +#define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) +#define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1) +#define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2) +#define NODE_STEP_REGNODE 1 /* sizeof(regnode)/sizeof(regnode) */ +#define EXTRA_STEP_2ARGS EXTRA_SIZE(struct regnode_2) + +#define NODE_STEP_B 4 #define NEXTOPER(p) ((p) + NODE_STEP_REGNODE) #define PREVOPER(p) ((p) - NODE_STEP_REGNODE) -#ifdef REGALIGN_STRUCT -# define FILL_ADVANCE_NODE(ptr, op) STMT_START { \ +#define FILL_ADVANCE_NODE(ptr, op) STMT_START { \ (ptr)->type = op; (ptr)->next_off = 0; (ptr)++; } STMT_END -# define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \ +#define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \ ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 1; } STMT_END -#else -# define FILL_ADVANCE_NODE(ptr, op) STMT_START { \ - *(ptr)++ = op; *(ptr)++ = '\0'; *(ptr)++ = '\0'; } STMT_END -# define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \ - ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 2; } STMT_END -#endif #define MAGIC 0234 @@ -394,12 +339,7 @@ typedef char* regnode; #define ANYOF_CLEAR(p,c) (ANYOF_BYTE(p,c) &= ~ANYOF_BIT(c)) #define ANYOF_TEST(p,c) (ANYOF_BYTE(p,c) & ANYOF_BIT(c)) -#ifdef REGALIGN_STRUCT #define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1) -#else -#define ANY_SKIP 32 /* overwrite the first byte of - * the next guy. */ -#endif /* * Utility definitions. @@ -421,7 +361,6 @@ typedef char* regnode; #ifdef REG_COMP_C const static U8 regarglen[] = { -# ifdef REGALIGN_STRUCT 0,0,0,0,0,0,0,0,0,0, /*CURLY*/ EXTRA_SIZE(struct regnode_2), /*CURLYX*/ EXTRA_SIZE(struct regnode_2), @@ -446,16 +385,6 @@ const static U8 regarglen[] = { /*LOGICAL*/ 0, /*SUSPEND*/ EXTRA_SIZE(struct regnode_1), /*RENUM*/ EXTRA_SIZE(struct regnode_1), 0, -# else - 0,0,0,0,0,0,0,0,0,0, - /*CURLY*/ 4, /*CURLYX*/ 4, - 0,0,0,0,0,0,0,0,0,0,0,0, - /*REF*/ 2, /*OPEN*/ 2, /*CLOSE*/ 2, - 0,0, /*IFMATCH*/ 2, /*UNLESSM*/ 2, - 0,0,0,0,0,0,0,0,0,0,0,0,/*CURLYM*/ 4,/*CURLYN*/ 4, - 0, /*REFF*/ 2, /*REFFL*/ 2, /*EVAL*/ 2, /*LONGJMP*/ 2, /*BRANCHJ*/ 2, - /*IFTHEN*/ 2, /*GROUPP*/ 2, /*LOGICAL*/ 0, /*RENUM*/ 2, /*RENUM*/ 2, 0, -# endif }; const static char reg_off_by_arg[] = { @@ -776,13 +776,9 @@ regmatch(regnode *prog) SvPVX(prop)); } ); -#ifdef REGALIGN next = scan + NEXT_OFF(scan); if (next == scan) next = NULL; -#else - next = regnext(scan); -#endif switch (OP(scan)) { case BOL: @@ -1242,15 +1238,11 @@ regmatch(regnode *prog) regendp[n] = 0; *reglastparen = n; scan = next; -#ifdef REGALIGN /*SUPPRESS 560*/ if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))) next += n; else next = NULL; -#else - next = regnext(next); -#endif inner = NEXTOPER(scan); if (c1 == BRANCHJ) { inner = NEXTOPER(inner); @@ -1274,7 +1266,6 @@ regmatch(regnode *prog) and has no parenths to influence future backrefs. */ ln = ARG1(scan); /* min to match */ n = ARG2(scan); /* max to match */ -#ifdef REGALIGN_STRUCT paren = scan->flags; if (paren) { if (paren > regsize) @@ -1282,7 +1273,6 @@ regmatch(regnode *prog) if (paren > *reglastparen) *reglastparen = paren; } -#endif scan = NEXTOPER(scan) + NODE_STEP_REGNODE; if (paren) scan += NEXT_OFF(scan); /* Skip former OPEN. */ |