summaryrefslogtreecommitdiff
path: root/pcre16_valid_utf16.c
diff options
context:
space:
mode:
authorchpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-10-16 15:56:18 +0000
committerchpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-10-16 15:56:18 +0000
commit079e382d01f40c050c4ca2d6d43dddb097a5c08e (patch)
tree3104eeb64264bd5bc2b5eae2c330e15622f645ca /pcre16_valid_utf16.c
parentcd603468f19fd836eba261890ee2329413e86ac0 (diff)
downloadpcre-079e382d01f40c050c4ca2d6d43dddb097a5c08e.tar.gz
pcre32: utf: Reject all non-characters and not just 0xfffe
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1098 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre16_valid_utf16.c')
-rw-r--r--pcre16_valid_utf16.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/pcre16_valid_utf16.c b/pcre16_valid_utf16.c
index 8047d23..6e7855d 100644
--- a/pcre16_valid_utf16.c
+++ b/pcre16_valid_utf16.c
@@ -69,7 +69,7 @@ PCRE_UTF16_ERR0 No error
PCRE_UTF16_ERR1 Missing low surrogate at the end of the string
PCRE_UTF16_ERR2 Invalid low surrogate
PCRE_UTF16_ERR3 Isolated low surrogate
-PCRE_UTF16_ERR4 Not allowed character
+PCRE_UTF16_ERR4 Non-character
Arguments:
string points to the string
@@ -85,7 +85,7 @@ PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
{
#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
-register pcre_uchar c;
+register pcre_uint32 c;
if (length < 0)
{
@@ -101,9 +101,8 @@ for (p = string; length-- > 0; p++)
{
/* Normal UTF-16 code point. Neither high nor low surrogate. */
- /* This is probably a BOM from a different byte-order.
- Regardless, the string is rejected. */
- if (c == 0xfffe)
+ /* Check for non-characters */
+ if ((c & 0xfffeu) == 0xfffeu || c >= 0xfdd0u && c <= 0xfdefu)
{
*erroroffset = p - string;
return PCRE_UTF16_ERR4;
@@ -126,6 +125,16 @@ for (p = string; length-- > 0; p++)
*erroroffset = p - string;
return PCRE_UTF16_ERR2;
}
+ else
+ {
+ /* Valid surrogate, but check for non-characters */
+ c = (((c & 0x3ffu) << 10) | (*p & 0x3ffu)) + 0x10000u;
+ if ((c & 0xfffeu) == 0xfffeu)
+ {
+ *erroroffset = p - string;
+ return PCRE_UTF16_ERR4;
+ }
+ }
}
else
{