summaryrefslogtreecommitdiff
path: root/pcre_study.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-05-27 10:14:09 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-05-27 10:14:09 +0000
commitadf2e795979e12190b8631f1360e941c4efb104d (patch)
treed3264e06020fd9183ed777a7a1d748d117a62cb2 /pcre_study.c
parentf14b12f846d24c0199cc73b40393ec704e419c42 (diff)
downloadpcre-adf2e795979e12190b8631f1360e941c4efb104d.tar.gz
Fixed some omissions in pcre_study() for the new caseless opcodes.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@603 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_study.c')
-rw-r--r--pcre_study.c59
1 files changed, 43 insertions, 16 deletions
diff --git a/pcre_study.c b/pcre_study.c
index 22dc4e3..5316a0f 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -73,6 +73,7 @@ Arguments:
Returns: the minimum length
-1 if \C was encountered
-2 internal error (missing capturing bracket)
+ -3 internal error (opcode not listed)
*/
static int
@@ -191,11 +192,17 @@ for (;;)
case OP_NOT:
case OP_NOTI:
case OP_PLUS:
+ case OP_PLUSI:
case OP_MINPLUS:
+ case OP_MINPLUSI:
case OP_POSPLUS:
+ case OP_POSPLUSI:
case OP_NOTPLUS:
+ case OP_NOTPLUSI:
case OP_NOTMINPLUS:
+ case OP_NOTMINPLUSI:
case OP_NOTPOSPLUS:
+ case OP_NOTPOSPLUSI:
branchlength++;
cc += 2;
#ifdef SUPPORT_UTF8
@@ -214,7 +221,9 @@ for (;;)
need to skip over a multibyte character in UTF8 mode. */
case OP_EXACT:
+ case OP_EXACTI:
case OP_NOTEXACT:
+ case OP_NOTEXACTI:
branchlength += GET2(cc,1);
cc += 4;
#ifdef SUPPORT_UTF8
@@ -397,7 +406,8 @@ for (;;)
of a character, we must take special action for UTF-8 characters. As it
happens, the "NOT" versions of these opcodes are used at present only for
ASCII characters, so they could be omitted from this list. However, in
- future that may change, so we leave them in this special case. */
+ future that may change, so we include them here so as not to leave a
+ gotcha for a future maintainer. */
case OP_UPTO:
case OP_UPTOI:
@@ -456,13 +466,24 @@ for (;;)
cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE];
break;
- /* For the record, these are the opcodes that are matched by "default":
- OP_ACCEPT, OP_CLOSE, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_SET_SOM, OP_SKIP,
- OP_THEN. */
+ /* The remaining opcodes are just skipped over. */
- default:
+ case OP_ACCEPT:
+ case OP_CLOSE:
+ case OP_COMMIT:
+ case OP_FAIL:
+ case OP_PRUNE:
+ case OP_SET_SOM:
+ case OP_SKIP:
+ case OP_THEN:
cc += _pcre_OP_lengths[op];
break;
+
+ /* This should not occur: we list all opcodes explicitly so that when
+ new ones get added they are properly considered. */
+
+ default:
+ return -3;
}
}
/* Control never gets here */
@@ -608,7 +629,6 @@ function fails unless the result is SSB_DONE.
Arguments:
code points to an expression
start_bits points to a 32-byte table, initialized to 0
- caseless the current state of the caseless flag
utf8 TRUE if in UTF-8 mode
cd the block with char table pointers
@@ -618,8 +638,8 @@ Returns: SSB_FAIL => Failed to find any starting bytes
*/
static int
-set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
- BOOL utf8, compile_data *cd)
+set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,
+ compile_data *cd)
{
register int c;
int yield = SSB_DONE;
@@ -668,7 +688,7 @@ do
case OP_SCBRA:
case OP_ONCE:
case OP_ASSERT:
- rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
+ rc = set_start_bits(tcode, start_bits, utf8, cd);
if (rc == SSB_FAIL) return SSB_FAIL;
if (rc == SSB_DONE) try_next = FALSE; else
{
@@ -713,7 +733,7 @@ do
case OP_BRAZERO:
case OP_BRAMINZERO:
- if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
+ if (set_start_bits(++tcode, start_bits, utf8, cd) == SSB_FAIL)
return SSB_FAIL;
/* =========================================================================
See the comment at the head of this function concerning the next line,
@@ -740,7 +760,7 @@ do
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
- tcode = set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
+ tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
break;
case OP_STARI:
@@ -757,7 +777,7 @@ do
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
- tcode = set_table_bit(start_bits, tcode + 3, caseless, cd, utf8);
+ tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8);
break;
case OP_UPTOI:
@@ -775,10 +795,13 @@ do
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
- (void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
+ (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
try_next = FALSE;
break;
+ case OP_EXACTI:
+ tcode += 2;
+ /* Fall through */
case OP_CHARI:
case OP_PLUSI:
case OP_MINPLUSI:
@@ -1120,14 +1143,18 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
/* See if we can find a fixed set of initial characters for the pattern. */
memset(start_bits, 0, 32 * sizeof(uschar));
- bits_set = set_start_bits(code, start_bits,
- (re->options & PCRE_CASELESS) != 0, (re->options & PCRE_UTF8) != 0,
+ bits_set = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
&compile_block) == SSB_DONE;
}
/* Find the minimum length of subject string. */
-min = find_minlength(code, code, re->options);
+switch(min = find_minlength(code, code, re->options))
+ {
+ case -2: *errorptr = "internal error: missing capturing bracket"; break;
+ case -3: *errorptr = "internal error: opcode not recognized"; break;
+ default: break;
+ }
/* Return NULL if no optimization is possible. */