diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-02-22 14:24:56 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-02-22 14:24:56 +0000 |
commit | a4ebdbff62929ee62185d2f17e93b9d838645604 (patch) | |
tree | aca91e9db6688a8cf89f42dc677004d9181305d4 /pcre_dfa_exec.c | |
parent | 5f83aeee3273dc59fa8878eb3e830e64213fcbd2 (diff) | |
download | pcre-a4ebdbff62929ee62185d2f17e93b9d838645604.tar.gz |
Upgrade DFA support to handle OP_NOTI with multibyte characters (other related
opcodes were OK).
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@925 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_dfa_exec.c')
-rw-r--r-- | pcre_dfa_exec.c | 34 |
1 files changed, 22 insertions, 12 deletions
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 4f950ec..c1c0050 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -695,10 +695,10 @@ for (;;) permitted. We also use this mechanism for opcodes such as OP_TYPEPLUS that take an - argument that is not a data character - but is always one byte long. We - have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in - this case. To keep the other cases fast, convert these ones to new opcodes. - */ + argument that is not a data character - but is always one byte long because + the values are small. We have to take special action to deal with \P, \p, + \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert + these ones to new opcodes. */ if (coptable[codevalue] > 0) { @@ -2266,22 +2266,32 @@ for (;;) break; /*-----------------------------------------------------------------*/ - /* Match a negated single character casefully. This is only used for - one-byte characters, that is, we know that d < 256. The character we are - checking (c) can be multibyte. */ + /* Match a negated single character casefully. */ case OP_NOT: if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } break; /*-----------------------------------------------------------------*/ - /* Match a negated single character caselessly. This is only used for - one-byte characters, that is, we know that d < 256. The character we are - checking (c) can be multibyte. */ + /* Match a negated single character caselessly. */ case OP_NOTI: - if (clen > 0 && c != d && c != fcc[d]) - { ADD_NEW(state_offset + dlen + 1, 0); } + if (clen > 0) + { + unsigned int otherd; +#ifdef SUPPORT_UTF + if (utf && d >= 128) + { +#ifdef SUPPORT_UCP + otherd = UCD_OTHERCASE(d); +#endif /* SUPPORT_UCP */ + } + else +#endif /* SUPPORT_UTF */ + otherd = TABLE_GET(d, fcc, d); + if (c != d && c != otherd) + { ADD_NEW(state_offset + dlen + 1, 0); } + } break; /*-----------------------------------------------------------------*/ |