summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog2
-rw-r--r--pcre_dfa_exec.c34
-rw-r--r--testdata/testinput108
-rw-r--r--testdata/testinput96
-rw-r--r--testdata/testoutput1011
-rw-r--r--testdata/testoutput99
6 files changed, 57 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 296e1ef..caf51bd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -42,7 +42,7 @@ Version 8.31
7. Individual JIT compile options can be set in pcretest by following -s+[+]
or /S+[+] with a digit between 1 and 7.
-8. OP_NOT now supports any UTF character not just single character ones.
+8. OP_NOT now supports any UTF character not just single-byte ones.
Version 8.30 04-February-2012
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 4f950ec..c1c0050 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -695,10 +695,10 @@ for (;;)
permitted.
We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
- argument that is not a data character - but is always one byte long. We
- have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
- this case. To keep the other cases fast, convert these ones to new opcodes.
- */
+ argument that is not a data character - but is always one byte long because
+ the values are small. We have to take special action to deal with \P, \p,
+ \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
+ these ones to new opcodes. */
if (coptable[codevalue] > 0)
{
@@ -2266,22 +2266,32 @@ for (;;)
break;
/*-----------------------------------------------------------------*/
- /* Match a negated single character casefully. This is only used for
- one-byte characters, that is, we know that d < 256. The character we are
- checking (c) can be multibyte. */
+ /* Match a negated single character casefully. */
case OP_NOT:
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
break;
/*-----------------------------------------------------------------*/
- /* Match a negated single character caselessly. This is only used for
- one-byte characters, that is, we know that d < 256. The character we are
- checking (c) can be multibyte. */
+ /* Match a negated single character caselessly. */
case OP_NOTI:
- if (clen > 0 && c != d && c != fcc[d])
- { ADD_NEW(state_offset + dlen + 1, 0); }
+ if (clen > 0)
+ {
+ unsigned int otherd;
+#ifdef SUPPORT_UTF
+ if (utf && d >= 128)
+ {
+#ifdef SUPPORT_UCP
+ otherd = UCD_OTHERCASE(d);
+#endif /* SUPPORT_UCP */
+ }
+ else
+#endif /* SUPPORT_UTF */
+ otherd = TABLE_GET(d, fcc, d);
+ if (c != d && c != otherd)
+ { ADD_NEW(state_offset + dlen + 1, 0); }
+ }
break;
/*-----------------------------------------------------------------*/
diff --git a/testdata/testinput10 b/testdata/testinput10
index 7b85f4d..6cdcc41 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -985,5 +985,13 @@
abc_
!\x{c0}++\x{c1}\x{c2}
!\x{c0}+++++
+
+/-- Caseless single negated characters > 127 need UCP support --/
+
+/[^\x{100}]/8i
+ \x{100}\x{101}X
+
+/[^\x{100}]+/8i
+ \x{100}\x{101}XX
/-- End of testinput10 --/
diff --git a/testdata/testinput9 b/testdata/testinput9
index 86fcbda..d9a1d9d 100644
--- a/testdata/testinput9
+++ b/testdata/testinput9
@@ -740,4 +740,10 @@
\r\r\r\P
\r\r\r\P\P
+/[^\x{100}]/8
+ \x{100}\x{101}X
+
+/[^\x{100}]+/8
+ \x{100}\x{101}X
+
/-- End of testinput9 --/
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index ca181db..cc94a34 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -2033,5 +2033,16 @@ No match
0: ++\xc1
!\x{c0}+++++
0: \xc0++
+
+/-- Caseless single negated characters > 127 need UCP support --/
+
+/[^\x{100}]/8i
+ \x{100}\x{101}X
+ 0: X
+
+/[^\x{100}]+/8i
+ \x{100}\x{101}XX
+ 0: XX
+ 1: X
/-- End of testinput10 --/
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 26ca40f..548c341 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -1414,4 +1414,13 @@ Partial match: \x{0d}\x{0d}
\r\r\r\P\P
Partial match: \x{0d}\x{0d}\x{0d}
+/[^\x{100}]/8
+ \x{100}\x{101}X
+ 0: \x{101}
+
+/[^\x{100}]+/8
+ \x{100}\x{101}X
+ 0: \x{101}X
+ 1: \x{101}
+
/-- End of testinput9 --/