summaryrefslogtreecommitdiff
path: root/pcre_dfa_exec.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-02-22 14:24:56 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-02-22 14:24:56 +0000
commita4ebdbff62929ee62185d2f17e93b9d838645604 (patch)
treeaca91e9db6688a8cf89f42dc677004d9181305d4 /pcre_dfa_exec.c
parent5f83aeee3273dc59fa8878eb3e830e64213fcbd2 (diff)
downloadpcre-a4ebdbff62929ee62185d2f17e93b9d838645604.tar.gz
Upgrade DFA support to handle OP_NOTI with multibyte characters (other related
opcodes were OK). git-svn-id: svn://vcs.exim.org/pcre/code/trunk@925 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_dfa_exec.c')
-rw-r--r--pcre_dfa_exec.c34
1 files changed, 22 insertions, 12 deletions
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 4f950ec..c1c0050 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -695,10 +695,10 @@ for (;;)
permitted.
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
- argument that is not a data character - but is always one byte long. We
- have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
- this case. To keep the other cases fast, convert these ones to new opcodes.
- */
+ argument that is not a data character - but is always one byte long because
+ the values are small. We have to take special action to deal with \P, \p,
+ \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
+ these ones to new opcodes. */
if (coptable[codevalue] > 0)
{
@@ -2266,22 +2266,32 @@ for (;;)
break;
/*-----------------------------------------------------------------*/
- /* Match a negated single character casefully. This is only used for
- one-byte characters, that is, we know that d < 256. The character we are
- checking (c) can be multibyte. */
+ /* Match a negated single character casefully. */
case OP_NOT:
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
break;
/*-----------------------------------------------------------------*/
- /* Match a negated single character caselessly. This is only used for
- one-byte characters, that is, we know that d < 256. The character we are
- checking (c) can be multibyte. */
+ /* Match a negated single character caselessly. */
case OP_NOTI:
- if (clen > 0 && c != d && c != fcc[d])
- { ADD_NEW(state_offset + dlen + 1, 0); }
+ if (clen > 0)
+ {
+ unsigned int otherd;
+#ifdef SUPPORT_UTF
+ if (utf && d >= 128)
+ {
+#ifdef SUPPORT_UCP
+ otherd = UCD_OTHERCASE(d);
+#endif /* SUPPORT_UCP */
+ }
+ else
+#endif /* SUPPORT_UTF */
+ otherd = TABLE_GET(d, fcc, d);
+ if (c != d && c != otherd)
+ { ADD_NEW(state_offset + dlen + 1, 0); }
+ }
break;
/*-----------------------------------------------------------------*/