summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pcre_compile.c13
-rw-r--r--pcre_internal.h2
-rw-r--r--pcreposix.c1
-rw-r--r--testdata/testinput182
-rw-r--r--testdata/testoutput183
-rw-r--r--testdata/testoutput22
-rw-r--r--testdata/testoutput510
7 files changed, 22 insertions, 11 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index 2b076cd..7da62e4 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -438,7 +438,7 @@ static const char error_texts[] =
/* 30 */
"unknown POSIX class name\0"
"POSIX collating elements are not supported\0"
- "this version of PCRE is not compiled with PCRE_UTF8 support\0"
+ "this version of PCRE is compiled without UTF support\0"
"spare error\0" /** DEAD **/
"character value in \\x{...} sequence is too large\0"
/* 35 */
@@ -461,7 +461,7 @@ static const char error_texts[] =
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
/* 50 */
"repeated subpattern is too long\0" /** DEAD **/
- "octal value is greater than \\377 (not in UTF-8 mode)\0"
+ "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
"internal error: overran compiling workspace\0"
"internal error: previously-checked referenced subpattern not found\0"
"DEFINE group contains more than one branch\0"
@@ -480,14 +480,15 @@ static const char error_texts[] =
/* 65 */
"different names for subpatterns of the same number are not allowed\0"
"(*MARK) must have an argument\0"
- "this version of PCRE is not compiled with PCRE_UCP support\0"
+ "this version of PCRE is not compiled with Unicode property support\0"
"\\c must be followed by an ASCII character\0"
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
/* 70 */
"internal error: unknown opcode in find_fixedlength()\0"
"\\N is not supported in a class\0"
"too many forward references\0"
- "disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff)\0"
+ "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
+ "invalid UTF-16 string\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -7706,7 +7707,11 @@ not used here. */
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
(errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
{
+#ifdef COMPILE_PCRE8
errorcode = ERR44;
+#else
+ errorcode = ERR74;
+#endif
goto PCRE_EARLY_ERROR_RETURN2;
}
#else
diff --git a/pcre_internal.h b/pcre_internal.h
index 837c50c..d949575 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1939,7 +1939,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERR71, ERR72, ERR73, ERRCOUNT };
+ ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT };
/* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit
diff --git a/pcreposix.c b/pcreposix.c
index 6929fc0..06cdd01 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -158,6 +158,7 @@ static const int eint[] = {
REG_BADPAT, /* \N is not supported in a class */
REG_BADPAT, /* too many forward references */
REG_BADPAT, /* disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) */
+ REG_BADPAT /* invalid UTF-16 string (should not occur) */
};
/* Table of texts corresponding to POSIX error codes */
diff --git a/testdata/testinput18 b/testdata/testinput18
index f075d8a..f63ed89 100644
--- a/testdata/testinput18
+++ b/testdata/testinput18
@@ -244,4 +244,6 @@ correctly, but that messes up comparisons). --/
<!testsaved16BE-2
+/νΌ€/8
+
/-- End of testinput18 --/
diff --git a/testdata/testoutput18 b/testdata/testoutput18
index f1013eb..f3bcfbd 100644
--- a/testdata/testoutput18
+++ b/testdata/testoutput18
@@ -899,4 +899,7 @@ No need char
Subject length lower bound = 2
No set of starting bytes
+/νΌ€/8
+Failed: invalid UTF-16 string at offset 0
+
/-- End of testinput18 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 9f5134d..3bbf5b3 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -6366,7 +6366,7 @@ No need char
10: Y
/\777/I
-Failed: octal value is greater than \377 (not in UTF-8 mode) at offset 3
+Failed: octal value is greater than \377 in 8-bit non-UTF-8 mode at offset 3
/\s*,\s*/IS
Capturing subpattern count = 0
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index ab56060..b13b789 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -12,10 +12,10 @@ Failed: character value in \x{...} sequence is too large at offset 11
Failed: character value in \x{...} sequence is too large at offset 12
/\x{d800}/8
-Failed: disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) at offset 7
+Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
/\x{dfff}/8
-Failed: disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) at offset 7
+Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
/\x{d7ff}/8
@@ -483,7 +483,7 @@ No first char
No need char
/\777/I
-Failed: octal value is greater than \377 (not in UTF-8 mode) at offset 3
+Failed: octal value is greater than \377 in 8-bit non-UTF-8 mode at offset 3
/\x{100}*\d/8DZ
------------------------------------------------------------------
@@ -1561,10 +1561,10 @@ Partial match: for
/\x{d7ff}\x{e000}/8
/\x{d800}/8
-Failed: disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) at offset 7
+Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
/\x{dfff}/8
-Failed: disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) at offset 7
+Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
/\h+/8
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}