diff options
-rw-r--r-- | ChangeLog | 2 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 34 | ||||
-rw-r--r-- | testdata/testinput10 | 8 | ||||
-rw-r--r-- | testdata/testinput9 | 6 | ||||
-rw-r--r-- | testdata/testoutput10 | 11 | ||||
-rw-r--r-- | testdata/testoutput9 | 9 |
6 files changed, 57 insertions, 13 deletions
@@ -42,7 +42,7 @@ Version 8.31 7. Individual JIT compile options can be set in pcretest by following -s+[+] or /S+[+] with a digit between 1 and 7. -8. OP_NOT now supports any UTF character not just single character ones. +8. OP_NOT now supports any UTF character not just single-byte ones. Version 8.30 04-February-2012 diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 4f950ec..c1c0050 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -695,10 +695,10 @@ for (;;) permitted. We also use this mechanism for opcodes such as OP_TYPEPLUS that take an - argument that is not a data character - but is always one byte long. We - have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in - this case. To keep the other cases fast, convert these ones to new opcodes. - */ + argument that is not a data character - but is always one byte long because + the values are small. We have to take special action to deal with \P, \p, + \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert + these ones to new opcodes. */ if (coptable[codevalue] > 0) { @@ -2266,22 +2266,32 @@ for (;;) break; /*-----------------------------------------------------------------*/ - /* Match a negated single character casefully. This is only used for - one-byte characters, that is, we know that d < 256. The character we are - checking (c) can be multibyte. */ + /* Match a negated single character casefully. */ case OP_NOT: if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } break; /*-----------------------------------------------------------------*/ - /* Match a negated single character caselessly. This is only used for - one-byte characters, that is, we know that d < 256. The character we are - checking (c) can be multibyte. */ + /* Match a negated single character caselessly. */ case OP_NOTI: - if (clen > 0 && c != d && c != fcc[d]) - { ADD_NEW(state_offset + dlen + 1, 0); } + if (clen > 0) + { + unsigned int otherd; +#ifdef SUPPORT_UTF + if (utf && d >= 128) + { +#ifdef SUPPORT_UCP + otherd = UCD_OTHERCASE(d); +#endif /* SUPPORT_UCP */ + } + else +#endif /* SUPPORT_UTF */ + otherd = TABLE_GET(d, fcc, d); + if (c != d && c != otherd) + { ADD_NEW(state_offset + dlen + 1, 0); } + } break; /*-----------------------------------------------------------------*/ diff --git a/testdata/testinput10 b/testdata/testinput10 index 7b85f4d..6cdcc41 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -985,5 +985,13 @@ abc_ !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++ + +/-- Caseless single negated characters > 127 need UCP support --/ + +/[^\x{100}]/8i + \x{100}\x{101}X + +/[^\x{100}]+/8i + \x{100}\x{101}XX /-- End of testinput10 --/ diff --git a/testdata/testinput9 b/testdata/testinput9 index 86fcbda..d9a1d9d 100644 --- a/testdata/testinput9 +++ b/testdata/testinput9 @@ -740,4 +740,10 @@ \r\r\r\P \r\r\r\P\P +/[^\x{100}]/8 + \x{100}\x{101}X + +/[^\x{100}]+/8 + \x{100}\x{101}X + /-- End of testinput9 --/ diff --git a/testdata/testoutput10 b/testdata/testoutput10 index ca181db..cc94a34 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -2033,5 +2033,16 @@ No match 0: ++\xc1 !\x{c0}+++++ 0: \xc0++ + +/-- Caseless single negated characters > 127 need UCP support --/ + +/[^\x{100}]/8i + \x{100}\x{101}X + 0: X + +/[^\x{100}]+/8i + \x{100}\x{101}XX + 0: XX + 1: X /-- End of testinput10 --/ diff --git a/testdata/testoutput9 b/testdata/testoutput9 index 26ca40f..548c341 100644 --- a/testdata/testoutput9 +++ b/testdata/testoutput9 @@ -1414,4 +1414,13 @@ Partial match: \x{0d}\x{0d} \r\r\r\P\P Partial match: \x{0d}\x{0d}\x{0d} +/[^\x{100}]/8 + \x{100}\x{101}X + 0: \x{101} + +/[^\x{100}]+/8 + \x{100}\x{101}X + 0: \x{101}X + 1: \x{101} + /-- End of testinput9 --/ |