summaryrefslogtreecommitdiff
path: root/pcre_valid_utf8.c
diff options
context:
space:
mode:
authorchpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-10-16 15:56:18 +0000
committerchpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-10-16 15:56:18 +0000
commit079e382d01f40c050c4ca2d6d43dddb097a5c08e (patch)
tree3104eeb64264bd5bc2b5eae2c330e15622f645ca /pcre_valid_utf8.c
parentcd603468f19fd836eba261890ee2329413e86ac0 (diff)
downloadpcre-079e382d01f40c050c4ca2d6d43dddb097a5c08e.tar.gz
pcre32: utf: Reject all non-characters and not just 0xfffe
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1098 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_valid_utf8.c')
-rw-r--r--pcre_valid_utf8.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/pcre_valid_utf8.c b/pcre_valid_utf8.c
index d1e8a55..a415927 100644
--- a/pcre_valid_utf8.c
+++ b/pcre_valid_utf8.c
@@ -92,6 +92,7 @@ PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
+PCRE_UTF8_ERR22 Non-character
Arguments:
string points to the string
@@ -116,7 +117,8 @@ if (length < 0)
for (p = string; length-- > 0; p++)
{
- register int ab, c, d;
+ register pcre_uchar ab, c, d;
+ pcre_uint32 v = 0;
c = *p;
if (c < 128) continue; /* ASCII character */
@@ -185,6 +187,7 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR14;
}
+ v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f);
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
@@ -212,6 +215,7 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR13;
}
+ v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f);
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
@@ -286,6 +290,14 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
}
+
+ /* Reject non-characters. The pointer p is currently at the last byte of the
+ character. */
+ if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef))
+ {
+ *erroroffset = (int)(p - string) - ab;
+ return PCRE_UTF8_ERR22;
+ }
}
#else /* Not SUPPORT_UTF */