summaryrefslogtreecommitdiff
path: root/pcre_study.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-05-25 08:29:03 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-05-25 08:29:03 +0000
commitf14b12f846d24c0199cc73b40393ec704e419c42 (patch)
tree55ee08ccaf34c96aa9bd295136ab6b8969c5ad44 /pcre_study.c
parent1b908148ffbe4b5ce256853ce46ad1b5954ec738 (diff)
downloadpcre-f14b12f846d24c0199cc73b40393ec704e419c42.tar.gz
Remove OP_OPT by handling /i and /m entirely at compile time. Fixes bug with
patterns like /(?i:([^b]))(?1)/, where the /i option was mishandled. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@602 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_study.c')
-rw-r--r--pcre_study.c71
1 files changed, 58 insertions, 13 deletions
diff --git a/pcre_study.c b/pcre_study.c
index be321fa..22dc4e3 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -160,14 +160,15 @@ for (;;)
case OP_RREF:
case OP_NRREF:
case OP_DEF:
- case OP_OPT:
case OP_CALLOUT:
case OP_SOD:
case OP_SOM:
case OP_EOD:
case OP_EODN:
case OP_CIRC:
+ case OP_CIRCM:
case OP_DOLL:
+ case OP_DOLLM:
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
cc += _pcre_OP_lengths[*cc];
@@ -186,8 +187,9 @@ for (;;)
/* Handle literal characters and + repetitions */
case OP_CHAR:
- case OP_CHARNC:
+ case OP_CHARI:
case OP_NOT:
+ case OP_NOTI:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
@@ -337,6 +339,7 @@ for (;;)
that case we must set the minimum length to zero. */
case OP_REF:
+ case OP_REFI:
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
{
ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));
@@ -391,23 +394,50 @@ for (;;)
/* Anything else does not or need not match a character. We can get the
item's length from the table, but for those that can match zero occurrences
- of a character, we must take special action for UTF-8 characters. */
+ of a character, we must take special action for UTF-8 characters. As it
+ happens, the "NOT" versions of these opcodes are used at present only for
+ ASCII characters, so they could be omitted from this list. However, in
+ future that may change, so we leave them in this special case. */
case OP_UPTO:
+ case OP_UPTOI:
case OP_NOTUPTO:
+ case OP_NOTUPTOI:
case OP_MINUPTO:
+ case OP_MINUPTOI:
case OP_NOTMINUPTO:
+ case OP_NOTMINUPTOI:
case OP_POSUPTO:
+ case OP_POSUPTOI:
+ case OP_NOTPOSUPTO:
+ case OP_NOTPOSUPTOI:
+
case OP_STAR:
+ case OP_STARI:
+ case OP_NOTSTAR:
+ case OP_NOTSTARI:
case OP_MINSTAR:
+ case OP_MINSTARI:
case OP_NOTMINSTAR:
+ case OP_NOTMINSTARI:
case OP_POSSTAR:
+ case OP_POSSTARI:
case OP_NOTPOSSTAR:
+ case OP_NOTPOSSTARI:
+
case OP_QUERY:
+ case OP_QUERYI:
+ case OP_NOTQUERY:
+ case OP_NOTQUERYI:
case OP_MINQUERY:
+ case OP_MINQUERYI:
case OP_NOTMINQUERY:
+ case OP_NOTMINQUERYI:
case OP_POSQUERY:
+ case OP_POSQUERYI:
case OP_NOTPOSQUERY:
+ case OP_NOTPOSQUERYI:
+
cc += _pcre_OP_lengths[op];
#ifdef SUPPORT_UTF8
if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
@@ -679,13 +709,6 @@ do
tcode += 1 + LINK_SIZE;
break;
- /* Skip over an option setting, changing the caseless flag */
-
- case OP_OPT:
- caseless = (tcode[1] & PCRE_CASELESS) != 0;
- tcode += 2;
- break;
-
/* BRAZERO does the bracket, but carries on. */
case OP_BRAZERO:
@@ -720,6 +743,15 @@ do
tcode = set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
break;
+ case OP_STARI:
+ case OP_MINSTARI:
+ case OP_POSSTARI:
+ case OP_QUERYI:
+ case OP_MINQUERYI:
+ case OP_POSQUERYI:
+ tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);
+ break;
+
/* Single-char upto sets the bit and tries the next */
case OP_UPTO:
@@ -728,13 +760,18 @@ do
tcode = set_table_bit(start_bits, tcode + 3, caseless, cd, utf8);
break;
+ case OP_UPTOI:
+ case OP_MINUPTOI:
+ case OP_POSUPTOI:
+ tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8);
+ break;
+
/* At least one single char sets the bit and stops */
- case OP_EXACT: /* Fall through */
+ case OP_EXACT:
tcode += 2;
-
+ /* Fall through */
case OP_CHAR:
- case OP_CHARNC:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
@@ -742,6 +779,14 @@ do
try_next = FALSE;
break;
+ case OP_CHARI:
+ case OP_PLUSI:
+ case OP_MINPLUSI:
+ case OP_POSPLUSI:
+ (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);
+ try_next = FALSE;
+ break;
+
/* Special spacing and line-terminating items. These recognize specific
lists of characters. The difference between VSPACE and ANYNL is that the
latter can match the two-character CRLF sequence, but that is not