summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_valid_utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_valid_utf8.c')
-rw-r--r--ext/pcre/pcrelib/pcre_valid_utf8.c19
1 files changed, 17 insertions, 2 deletions
diff --git a/ext/pcre/pcrelib/pcre_valid_utf8.c b/ext/pcre/pcrelib/pcre_valid_utf8.c
index f49832dc5b..a415927568 100644
--- a/ext/pcre/pcrelib/pcre_valid_utf8.c
+++ b/ext/pcre/pcrelib/pcre_valid_utf8.c
@@ -42,7 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
strings. */
+#ifdef HAVE_CONFIG_H
#include "config.h"
+#endif
#include "pcre_internal.h"
@@ -90,6 +92,7 @@ PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
+PCRE_UTF8_ERR22 Non-character
Arguments:
string points to the string
@@ -114,7 +117,8 @@ if (length < 0)
for (p = string; length-- > 0; p++)
{
- register int ab, c, d;
+ register pcre_uchar ab, c, d;
+ pcre_uint32 v = 0;
c = *p;
if (c < 128) continue; /* ASCII character */
@@ -183,6 +187,7 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR14;
}
+ v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f);
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
@@ -210,6 +215,7 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR13;
}
+ v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f);
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
@@ -284,11 +290,20 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
}
+
+ /* Reject non-characters. The pointer p is currently at the last byte of the
+ character. */
+ if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef))
+ {
+ *erroroffset = (int)(p - string) - ab;
+ return PCRE_UTF8_ERR22;
+ }
}
-#else /* SUPPORT_UTF */
+#else /* Not SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
+(void)(erroroffset);
#endif
return PCRE_UTF8_ERR0; /* This indicates success */