diff options
author | chpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-10-16 15:56:18 +0000 |
---|---|---|
committer | chpe <chpe@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-10-16 15:56:18 +0000 |
commit | 079e382d01f40c050c4ca2d6d43dddb097a5c08e (patch) | |
tree | 3104eeb64264bd5bc2b5eae2c330e15622f645ca /pcre_valid_utf8.c | |
parent | cd603468f19fd836eba261890ee2329413e86ac0 (diff) | |
download | pcre-079e382d01f40c050c4ca2d6d43dddb097a5c08e.tar.gz |
pcre32: utf: Reject all non-characters and not just 0xfffe
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1098 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_valid_utf8.c')
-rw-r--r-- | pcre_valid_utf8.c | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/pcre_valid_utf8.c b/pcre_valid_utf8.c index d1e8a55..a415927 100644 --- a/pcre_valid_utf8.c +++ b/pcre_valid_utf8.c @@ -92,6 +92,7 @@ PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur) PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur) PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character) PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff +PCRE_UTF8_ERR22 Non-character Arguments: string points to the string @@ -116,7 +117,8 @@ if (length < 0) for (p = string; length-- > 0; p++) { - register int ab, c, d; + register pcre_uchar ab, c, d; + pcre_uint32 v = 0; c = *p; if (c < 128) continue; /* ASCII character */ @@ -185,6 +187,7 @@ for (p = string; length-- > 0; p++) *erroroffset = (int)(p - string) - 2; return PCRE_UTF8_ERR14; } + v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f); break; /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2 @@ -212,6 +215,7 @@ for (p = string; length-- > 0; p++) *erroroffset = (int)(p - string) - 3; return PCRE_UTF8_ERR13; } + v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f); break; /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be @@ -286,6 +290,14 @@ for (p = string; length-- > 0; p++) *erroroffset = (int)(p - string) - ab; return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12; } + + /* Reject non-characters. The pointer p is currently at the last byte of the + character. */ + if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef)) + { + *erroroffset = (int)(p - string) - ab; + return PCRE_UTF8_ERR22; + } } #else /* Not SUPPORT_UTF */ |