diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-27 17:40:33 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-27 17:40:33 +0000 |
commit | 4d715f1b6035e095635067d977ad56948ff4e4c2 (patch) | |
tree | 5d6a416fd53c5c8ec7fa4123a5cc7e0874b97434 | |
parent | af0099785014bcb1968b7665a8a6f85cd299bb8f (diff) | |
download | pcre-4d715f1b6035e095635067d977ad56948ff4e4c2.tar.gz |
Update those opcode sizes whose depend on IMM2_SIZE
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@769 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | pcre_compile.c | 38 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 55 | ||||
-rw-r--r-- | pcre_exec.c | 36 | ||||
-rw-r--r-- | pcre_internal.h | 45 | ||||
-rw-r--r-- | pcre_jit_compile.c | 82 | ||||
-rw-r--r-- | pcre_printint.src | 13 | ||||
-rw-r--r-- | pcre_study.c | 32 |
7 files changed, 157 insertions, 144 deletions
diff --git a/pcre_compile.c b/pcre_compile.c index b0e6367..1664506 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -1615,7 +1615,7 @@ for (;;) case OP_ONCE: case OP_ONCE_NC: case OP_COND: - d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd); + d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf8, atend, cd); if (d < 0) return d; branchlength += d; do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -1721,7 +1721,7 @@ for (;;) case OP_NOTEXACT: case OP_NOTEXACTI: branchlength += GET2(cc,1); - cc += 4; + cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UTF8 if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f]; #endif @@ -1729,8 +1729,8 @@ for (;;) case OP_TYPEEXACT: branchlength += GET2(cc,1); - if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2; - cc += 4; + if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; + cc += 1 + IMM2_SIZE + 1; break; /* Handle single-char matchers */ @@ -1786,9 +1786,9 @@ for (;;) case OP_CRRANGE: case OP_CRMINRANGE: - if (GET2(cc,1) != GET2(cc,3)) return -1; + if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1; branchlength += GET2(cc,1); - cc += 5; + cc += 1 + 2 * IMM2_SIZE; break; default: @@ -1967,7 +1967,8 @@ for (;;) case OP_TYPEMINUPTO: case OP_TYPEEXACT: case OP_TYPEPOSUPTO: - if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; + if (code[1 + IMM2_SIZE] == OP_PROP + || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; break; case OP_MARK: @@ -2086,7 +2087,8 @@ for (;;) case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEEXACT: - if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; + if (code[1 + IMM2_SIZE] == OP_PROP + || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; break; case OP_MARK: @@ -2383,7 +2385,8 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; + if (code[1 + IMM2_SIZE] == OP_PROP + || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; break; /* End of branch */ @@ -2420,7 +2423,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); case OP_MINUPTOI: case OP_POSUPTO: case OP_POSUPTOI: - if (utf8 && code[3] >= 0xc0) code += PRIV(utf8_table4)[code[3] & 0x3f]; + if (utf8 && code[1 + IMM2_SIZE] >= 0xc0) code += PRIV(utf8_table4)[code[1 + IMM2_SIZE] & 0x3f]; break; #endif @@ -5181,7 +5184,8 @@ for (;; ptr++) if (*tempcode == OP_TYPEEXACT) tempcode += PRIV(OP_lengths)[*tempcode] + - ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0); + ((tempcode[1 + IMM2_SIZE] == OP_PROP + || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT) { @@ -5420,10 +5424,10 @@ for (;; ptr++) break; /* Most other conditions use OP_CREF (a couple change to OP_RREF - below), and all need to skip 3 bytes at the start of the group. */ + below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */ code[1+LINK_SIZE] = OP_CREF; - skipbytes = 3; + skipbytes = 1+IMM2_SIZE; refsign = -1; /* Check for a test for recursion in a named group. */ @@ -6164,7 +6168,7 @@ for (;; ptr++) NUMBERED_GROUP: cd->bracount += 1; PUT2(code, 1+LINK_SIZE, cd->bracount); - skipbytes = 2; + skipbytes = IMM2_SIZE; } /* Process nested bracketed regex. Assertions used not to be repeatable, @@ -7165,7 +7169,7 @@ register int c = -1; do { int d; int xl = (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0; + *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE); register int op = *scode; @@ -7191,7 +7195,7 @@ do { break; case OP_EXACT: - scode += 2; + scode += IMM2_SIZE; /* Fall through */ case OP_CHAR: @@ -7204,7 +7208,7 @@ do { break; case OP_EXACTI: - scode += 2; + scode += IMM2_SIZE; /* Fall through */ case OP_CHARI: diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 28f4f5b..ea5b00c 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -442,7 +442,8 @@ new_count = 0; first_op = this_start_code + 1 + LINK_SIZE + ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || - *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0); + *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) + ? IMM2_SIZE:0); /* The first thing in any (sub) pattern is a bracket of some sort. Push all the alternative states onto the list, and find out where the end is. This @@ -542,8 +543,8 @@ else { int length = 1 + LINK_SIZE + ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || - *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? - 2:0); + *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) + ? IMM2_SIZE:0); do { ADD_NEW((int)(end_code - start_code + length), 0); @@ -556,7 +557,7 @@ else workspace[0] = 0; /* Bit indicating which vector is current */ -DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code)); +DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code))); /* Loop for scanning the subject */ @@ -816,7 +817,7 @@ for (;;) /*-----------------------------------------------------------------*/ case OP_CBRA: case OP_SCBRA: - ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE), 0); + ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0); code += GET(code, 1); while (*code == OP_ALT) { @@ -1157,7 +1158,7 @@ for (;;) ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) { if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 4, 0); } + { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); } else { ADD_NEW(state_offset, count); } } @@ -1168,7 +1169,7 @@ for (;;) case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - ADD_ACTIVE(state_offset + 4, 0); + ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -1183,7 +1184,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 4, 0); } + { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); } else { ADD_NEW(state_offset, count); } } @@ -1719,13 +1720,13 @@ for (;;) case OP_PROP_EXTRA + OP_TYPEMINUPTO: case OP_PROP_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 6, 0); } + { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); } count = current_state->count; /* Number already matched */ if (clen > 0) { BOOL OK; const ucd_record * prop = GET_UCD(c); - switch(code[4]) + switch(code[1 + IMM2_SIZE + 1]) { case PT_ANY: OK = TRUE; @@ -1737,15 +1738,15 @@ for (;;) break; case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[5]; + OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; break; case PT_PC: - OK = prop->chartype == code[5]; + OK = prop->chartype == code[1 + IMM2_SIZE + 2]; break; case PT_SC: - OK = prop->script == code[5]; + OK = prop->script == code[1 + IMM2_SIZE + 2]; break; /* These are specials for combination cases. */ @@ -1787,7 +1788,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 6, 0); } + { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); } else { ADD_NEW(state_offset, count); } } @@ -1800,7 +1801,7 @@ for (;;) case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 4, 0); } + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } count = current_state->count; /* Number already matched */ if (clen > 0 && UCD_CATEGORY(c) != ucp_M) { @@ -1821,7 +1822,7 @@ for (;;) nptr += ndlen; } if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); } + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } else { ADD_NEW_DATA(-state_offset, count, ncount); } } @@ -1834,7 +1835,7 @@ for (;;) case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 4, 0); } + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -1861,7 +1862,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); } + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } else { ADD_NEW_DATA(-state_offset, count, ncount); } break; @@ -1878,7 +1879,7 @@ for (;;) case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 4, 0); } + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -1907,7 +1908,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 4), 0, 0); } + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } else { ADD_NEW_DATA(-state_offset, count, 0); } } @@ -1920,7 +1921,7 @@ for (;;) case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 4, 0); } + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -1962,7 +1963,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 4), 0, 0); } + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } else { ADD_NEW_DATA(-state_offset, count, 0); } } @@ -2352,7 +2353,7 @@ for (;;) if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + dlen + 3, 0); } + { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } else { ADD_NEW(state_offset, count); } } @@ -2375,7 +2376,7 @@ for (;;) case OP_NOTUPTO: case OP_NOTMINUPTO: case OP_NOTPOSUPTO: - ADD_ACTIVE(state_offset + dlen + 3, 0); + ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0); count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -2401,7 +2402,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + dlen + 3, 0); } + { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } else { ADD_NEW(state_offset, count); } } @@ -2474,12 +2475,12 @@ for (;;) case OP_CRMINRANGE: count = current_state->count; /* Already matched */ if (count >= GET2(ecode, 1)) - { ADD_ACTIVE(next_state_offset + 5, 0); } + { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } if (isinclass) { int max = GET2(ecode, 3); if (++count >= max && max != 0) /* Max 0 => no limit */ - { ADD_NEW(next_state_offset + 5, 0); } + { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } else { ADD_NEW(state_offset, count); } } diff --git a/pcre_exec.c b/pcre_exec.c index 4aa4a0a..41a2482 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -1308,7 +1308,7 @@ for (;;) /* Chose branch according to the condition */ - ecode += condition? 3 : GET(ecode, 1); + ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); } } @@ -1376,7 +1376,7 @@ for (;;) /* Chose branch according to the condition */ - ecode += condition? 3 : GET(ecode, 1); + ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); } else if (condcode == OP_DEF) /* DEFINE - always false */ @@ -1468,7 +1468,7 @@ for (;;) md->offset_vector[offset+1] = (int)(eptr - md->start_subject); if (offset_top <= offset) offset_top = offset + 2; } - ecode += 3; + ecode += 1 + IMM2_SIZE; break; @@ -2567,7 +2567,7 @@ for (;;) case OP_REFI: caseless = op == OP_REFI; offset = GET2(ecode, 1) << 1; /* Doubled ref number */ - ecode += 3; + ecode += 1 + IMM2_SIZE; /* If the reference is unset, there are two possibilities: @@ -2607,9 +2607,9 @@ for (;;) case OP_CRMINRANGE: minimize = (*ecode == OP_CRMINRANGE); min = GET2(ecode, 1); - max = GET2(ecode, 3); + max = GET2(ecode, 1 + IMM2_SIZE); if (max == 0) max = INT_MAX; - ecode += 5; + ecode += 1 + 2 * IMM2_SIZE; break; default: /* No repeat follows */ @@ -2728,9 +2728,9 @@ for (;;) case OP_CRMINRANGE: minimize = (*ecode == OP_CRMINRANGE); min = GET2(ecode, 1); - max = GET2(ecode, 3); + max = GET2(ecode, 1 + IMM2_SIZE); if (max == 0) max = INT_MAX; - ecode += 5; + ecode += 1 + 2 * IMM2_SIZE; break; default: /* No repeat follows */ @@ -2929,9 +2929,9 @@ for (;;) case OP_CRMINRANGE: minimize = (*ecode == OP_CRMINRANGE); min = GET2(ecode, 1); - max = GET2(ecode, 3); + max = GET2(ecode, 1 + IMM2_SIZE); if (max == 0) max = INT_MAX; - ecode += 5; + ecode += 1 + 2 * IMM2_SIZE; break; default: /* No repeat follows */ @@ -3104,7 +3104,7 @@ for (;;) case OP_EXACT: case OP_EXACTI: min = max = GET2(ecode, 1); - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATCHAR; case OP_POSUPTO: @@ -3119,7 +3119,7 @@ for (;;) min = 0; max = GET2(ecode, 1); minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATCHAR; case OP_POSSTAR: @@ -3439,7 +3439,7 @@ for (;;) case OP_NOTEXACT: case OP_NOTEXACTI: min = max = GET2(ecode, 1); - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATNOTCHAR; case OP_NOTUPTO: @@ -3449,7 +3449,7 @@ for (;;) min = 0; max = GET2(ecode, 1); minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATNOTCHAR; case OP_NOTPOSSTAR: @@ -3481,7 +3481,7 @@ for (;;) possessive = TRUE; min = 0; max = GET2(ecode, 1); - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATNOTCHAR; case OP_NOTSTAR: @@ -3805,7 +3805,7 @@ for (;;) case OP_TYPEEXACT: min = max = GET2(ecode, 1); minimize = TRUE; - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATTYPE; case OP_TYPEUPTO: @@ -3813,7 +3813,7 @@ for (;;) min = 0; max = GET2(ecode, 1); minimize = *ecode == OP_TYPEMINUPTO; - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATTYPE; case OP_TYPEPOSSTAR: @@ -3841,7 +3841,7 @@ for (;;) possessive = TRUE; min = 0; max = GET2(ecode, 1); - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATTYPE; case OP_TYPESTAR: diff --git a/pcre_internal.h b/pcre_internal.h index ec7a9ff..0228207 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1680,30 +1680,35 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 2, /* noti */ \ /* Positive single-char repeats ** These are */ \ 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ - 4, 4, 4, /* upto, minupto, exact ** mode */ \ - 2, 2, 2, 4, /* *+, ++, ?+, upto+ */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \ + 2+IMM2_SIZE, /* exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \ 2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \ - 4, 4, 4, /* upto I, minupto I, exact I */ \ - 2, 2, 2, 4, /* *+I, ++I, ?+I, upto+I */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \ + 2+IMM2_SIZE, /* exact I */ \ + 2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \ /* Negative single-char repeats - only for chars < 256 */ \ 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ - 4, 4, 4, /* NOT upto, minupto, exact */ \ - 2, 2, 2, 4, /* Possessive NOT *, +, ?, upto */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \ + 2+IMM2_SIZE, /* NOT exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \ 2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \ - 4, 4, 4, /* NOT upto I, minupto I, exact I */ \ - 2, 2, 2, 4, /* Possessive NOT *I, +I, ?I, upto I */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \ + 2+IMM2_SIZE, /* NOT exact I */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \ /* Positive type repeats */ \ 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ - 4, 4, 4, /* Type upto, minupto, exact */ \ - 2, 2, 2, 4, /* Possessive *+, ++, ?+, upto+ */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \ + 2+IMM2_SIZE, /* Type exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \ /* Character class & ref repeats */ \ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ - 5, 5, /* CRRANGE, CRMINRANGE */ \ + 1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ 33, /* CLASS */ \ 33, /* NCLASS */ \ 0, /* XCLASS - variable length */ \ - 3, /* REF */ \ - 3, /* REFI */ \ + 1+IMM2_SIZE, /* REF */ \ + 1+IMM2_SIZE, /* REFI */ \ 1+LINK_SIZE, /* RECURSE */ \ 2+2*LINK_SIZE, /* CALLOUT */ \ 1+LINK_SIZE, /* Alt */ \ @@ -1720,23 +1725,23 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+LINK_SIZE, /* ONCE_NC */ \ 1+LINK_SIZE, /* BRA */ \ 1+LINK_SIZE, /* BRAPOS */ \ - 3+LINK_SIZE, /* CBRA */ \ - 3+LINK_SIZE, /* CBRAPOS */ \ + 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ + 1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \ 1+LINK_SIZE, /* COND */ \ 1+LINK_SIZE, /* SBRA */ \ 1+LINK_SIZE, /* SBRAPOS */ \ - 3+LINK_SIZE, /* SCBRA */ \ - 3+LINK_SIZE, /* SCBRAPOS */ \ + 1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ + 1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ 1+LINK_SIZE, /* SCOND */ \ - 3, 3, /* CREF, NCREF */ \ - 3, 3, /* RREF, NRREF */ \ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \ 1, /* DEF */ \ 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ 1, 3, /* SKIP, SKIP_ARG */ \ 1, 3, /* THEN, THEN_ARG */ \ 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ - 3, 1 /* CLOSE, SKIPZERO */ + 1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */ /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion" condition. */ diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c index c1d1140..5fed4a1 100644 --- a/pcre_jit_compile.c +++ b/pcre_jit_compile.c @@ -564,7 +564,7 @@ switch(*cc) case OP_NOTMINUPTOI: case OP_NOTEXACTI: case OP_NOTPOSUPTOI: - cc += 4; + cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UTF8 if (common->utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f]; #endif @@ -583,12 +583,12 @@ switch(*cc) case OP_RREF: case OP_NRREF: case OP_CLOSE: - cc += 3; + cc += 1 + IMM2_SIZE; return cc; case OP_CRRANGE: case OP_CRMINRANGE: - return cc + 5; + return cc + 1 + 2 * IMM2_SIZE; case OP_CLASS: case OP_NCLASS: @@ -624,7 +624,7 @@ switch(*cc) case OP_CBRAPOS: case OP_SCBRA: case OP_SCBRAPOS: - return cc + 1 + LINK_SIZE + 2; + return cc + 1 + LINK_SIZE + IMM2_SIZE; default: return NULL; @@ -657,7 +657,7 @@ while (cc < ccend) case OP_CBRAPOS: case OP_SCBRAPOS: localspace += sizeof(sljit_w); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -705,7 +705,7 @@ while (cc < ccend) case OP_SCBRAPOS: common->localptrs[cc - common->start] = localptr; localptr += sizeof(sljit_w); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -761,7 +761,7 @@ while (cc < ccend) case OP_SCBRA: case OP_SCBRAPOS: length += 3; - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; default: @@ -824,7 +824,7 @@ while (cc < ccend) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); stackpos += (int)sizeof(sljit_w); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; default: @@ -863,13 +863,13 @@ while (cc < ccend) case OP_CBRA: case OP_SCBRA: localsize++; - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: localsize += 2; - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -969,7 +969,7 @@ while (status != end) case OP_SCBRA: count = 1; srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: @@ -978,7 +978,7 @@ while (status != end) srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); srcw[0] = PRIV_DATA(cc); SLJIT_ASSERT(srcw[0] != 0); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -1427,8 +1427,8 @@ if (common->utf8) return; } #endif -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); } @@ -1449,7 +1449,7 @@ if (common->utf8) return; } #endif -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue) @@ -3116,10 +3116,10 @@ switch(type) case OP_NOT: case OP_NOTI: - length = 1; #ifdef SUPPORT_UTF8 if (common->utf8) { + length = 1; if (*cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f]; check_input_end(common, fallbacks); @@ -3150,9 +3150,9 @@ switch(type) else #endif { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); c = *cc; } @@ -3173,7 +3173,7 @@ switch(type) add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); } } - return cc + length; + return cc + 1; case OP_CLASS: case OP_NCLASS: @@ -3403,7 +3403,7 @@ if (jump != NULL) else JUMPHERE(jump); } -return cc + 3; +return cc + 1 + IMM2_SIZE; } static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) @@ -3420,7 +3420,7 @@ BOOL minimize; PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL); -type = cc[3]; +type = cc[1 + IMM2_SIZE]; minimize = (type & 0x1) != 0; switch(type) { @@ -3428,25 +3428,25 @@ switch(type) case OP_CRMINSTAR: min = 0; max = 0; - cc += 4; + cc += 1 + IMM2_SIZE + 1; break; case OP_CRPLUS: case OP_CRMINPLUS: min = 1; max = 0; - cc += 4; + cc += 1 + IMM2_SIZE + 1; break; case OP_CRQUERY: case OP_CRMINQUERY: min = 0; max = 1; - cc += 4; + cc += 1 + IMM2_SIZE + 1; break; case OP_CRRANGE: case OP_CRMINRANGE: - min = GET2(cc, 3 + 1); - max = GET2(cc, 3 + 3); - cc += 8; + min = GET2(cc, 1 + IMM2_SIZE + 1); + max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); + cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; break; default: SLJIT_ASSERT_STOP(); @@ -4101,7 +4101,7 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA) localptr = OVECTOR_PRIV(offset); offset <<= 1; FALLBACK_AS(bracket_fallback)->localptr = localptr; - hotpath += 2; + hotpath += IMM2_SIZE; } else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) { @@ -4265,7 +4265,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) SLJIT_ASSERT(has_alternatives); add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - hotpath += 3; + hotpath += 1 + IMM2_SIZE; } else if (*hotpath == OP_NCREF) { @@ -4284,7 +4284,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); JUMPHERE(jump); - hotpath += 3; + hotpath += 1 + IMM2_SIZE; } else if (*hotpath == OP_RREF || *hotpath == OP_NRREF) { @@ -4305,7 +4305,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) { SLJIT_ASSERT(!has_alternatives); if (stacksize != 0) - hotpath += 3; + hotpath += 1 + IMM2_SIZE; else { if (*cc == OP_ALT) @@ -4332,7 +4332,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); - hotpath += 3; + hotpath += 1 + IMM2_SIZE; } } else @@ -4507,7 +4507,7 @@ switch(opcode) offset = GET2(cc, 1 + LINK_SIZE); cbraprivptr = OVECTOR_PRIV(offset); offset <<= 1; - ccbegin = cc + 1 + LINK_SIZE + 2; + ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; break; default: @@ -4736,7 +4736,7 @@ else else { SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE); - *arg1 = GET2(cc, (class_len + 2)); + *arg1 = GET2(cc, (class_len + IMM2_SIZE)); *arg2 = GET2(cc, class_len); if (*arg2 == 0) @@ -4748,7 +4748,7 @@ else *opcode = OP_EXACT; if (end != NULL) - *end = cc + class_len + 4; + *end = cc + class_len + 2 * IMM2_SIZE; } return cc; } @@ -4756,7 +4756,7 @@ else if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) { *arg1 = GET2(cc, 0); - cc += 2; + cc += IMM2_SIZE; } if (*type == 0) @@ -4998,13 +4998,13 @@ int offset = GET2(cc, 1); /* Data will be discarded anyway... */ if (common->currententry != NULL) - return cc + 3; + return cc + 1 + IMM2_SIZE; OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset)); offset <<= 1; OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); -return cc + 3; +return cc + 1 + IMM2_SIZE; } static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent) @@ -5150,7 +5150,7 @@ while (cc < ccend) case OP_REF: case OP_REFI: - if (cc[3] >= OP_CRSTAR && cc[3] <= OP_CRMINRANGE) + if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE) cc = compile_ref_iterator_hotpath(common, cc, parent); else cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE); @@ -5388,7 +5388,7 @@ DEFINE_COMPILER; pcre_uchar *cc = current->cc; pcre_uchar type; -type = cc[3]; +type = cc[1 + IMM2_SIZE]; if ((type & 0x1) == 0) { set_jumps(current->topfallbacks, LABEL()); @@ -6062,7 +6062,7 @@ static SLJIT_INLINE void compile_recurse(compiler_common *common) { DEFINE_COMPILER; pcre_uchar *cc = common->start + common->currententry->start; -pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : 2); +pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); pcre_uchar *ccend = bracketend(cc); int localsize = get_localsize(common, ccbegin, ccend); int framesize = get_framesize(common, cc, TRUE); diff --git a/pcre_printint.src b/pcre_printint.src index 41e3555..a5670e5 100644 --- a/pcre_printint.src +++ b/pcre_printint.src @@ -364,7 +364,7 @@ for(;;) case OP_MINUPTO: case OP_POSUPTO: fprintf(f, " %s ", flag); - extra = print_char(f, code+3, utf8); + extra = print_char(f, code + 1 + IMM2_SIZE, utf8); fprintf(f, "{"); if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); fprintf(f, "%d}", GET2(code,1)); @@ -376,10 +376,11 @@ for(;;) case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - fprintf(f, " %s", OP_names[code[3]]); - if (code[3] == OP_PROP || code[3] == OP_NOTPROP) + fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]); + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) { - fprintf(f, " %s ", get_ucpname(code[4], code[5])); + fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1], + code[1 + IMM2_SIZE + 2])); extra = 2; } fprintf(f, "{"); @@ -436,7 +437,7 @@ for(;;) case OP_NOTUPTO: case OP_NOTMINUPTO: case OP_NOTPOSUPTO: - c = code[3]; + c = code[1 + IMM2_SIZE]; if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c); else fprintf(f, " %s [^\\x%02x]{", flag, c); if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); @@ -576,7 +577,7 @@ for(;;) case OP_CRRANGE: case OP_CRMINRANGE: min = GET2(ccode,1); - max = GET2(ccode,3); + max = GET2(ccode,1 + IMM2_SIZE); if (max == 0) fprintf(f, "{%d,}", min); else fprintf(f, "{%d,%d}", min, max); if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); diff --git a/pcre_study.c b/pcre_study.c index 07e548a..5253c49 100644 --- a/pcre_study.c +++ b/pcre_study.c @@ -88,7 +88,7 @@ register int branchlength = 0; register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; if (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += 2; + *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; /* Scan along the opcodes for this branch. If we get to the end of the branch, check the length against that of the other branches. */ @@ -243,7 +243,7 @@ for (;;) case OP_NOTEXACT: case OP_NOTEXACTI: branchlength += GET2(cc,1); - cc += 4; + cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UTF8 if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f]; #endif @@ -251,7 +251,8 @@ for (;;) case OP_TYPEEXACT: branchlength += GET2(cc,1); - cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4; + cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP + || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); break; /* Handle single-char non-literal matchers */ @@ -314,7 +315,8 @@ for (;;) case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2; + if (cc[1 + IMM2_SIZE] == OP_PROP + || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; cc += PRIV(OP_lengths)[op]; break; @@ -347,7 +349,7 @@ for (;;) case OP_CRRANGE: case OP_CRMINRANGE: branchlength += GET2(cc,1); - cc += 5; + cc += 1 + 2 * IMM2_SIZE; break; default: @@ -386,7 +388,7 @@ for (;;) } } else d = 0; - cc += 3; + cc += 1 + IMM2_SIZE; /* Handle repeated back references */ @@ -409,7 +411,7 @@ for (;;) case OP_CRRANGE: case OP_CRMINRANGE: min = GET2(cc, 1); - cc += 5; + cc += 1 + 2 * IMM2_SIZE; break; default: @@ -701,7 +703,7 @@ do const pcre_uchar *tcode = code + 1 + LINK_SIZE; if (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2; + *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE; while (try_next) /* Loop for items in this branch */ { @@ -904,19 +906,19 @@ do case OP_UPTO: case OP_MINUPTO: case OP_POSUPTO: - tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8); + tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf8); break; case OP_UPTOI: case OP_MINUPTOI: case OP_POSUPTOI: - tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8); + tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf8); break; /* At least one single char sets the bit and stops */ case OP_EXACT: - tcode += 2; + tcode += IMM2_SIZE; /* Fall through */ case OP_CHAR: case OP_PLUS: @@ -927,7 +929,7 @@ do break; case OP_EXACTI: - tcode += 2; + tcode += IMM2_SIZE; /* Fall through */ case OP_CHARI: case OP_PLUSI: @@ -1026,7 +1028,7 @@ do break; case OP_TYPEEXACT: - tcode += 3; + tcode += 1 + IMM2_SIZE; break; /* Zero or more repeats of character types set the bits and then @@ -1035,7 +1037,7 @@ do case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - tcode += 2; /* Fall through */ + tcode += IMM2_SIZE; /* Fall through */ case OP_TYPESTAR: case OP_TYPEMINSTAR: @@ -1178,7 +1180,7 @@ do case OP_CRRANGE: case OP_CRMINRANGE: - if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; + if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; else try_next = FALSE; break; |