summaryrefslogtreecommitdiff
path: root/pcre_valid_utf8.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-02-27 16:27:01 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-02-27 16:27:01 +0000
commitfddeb9c5b24e4d2799afbf6367980c5d982df4d2 (patch)
tree37ac15a4f72dbb78f889d770cfd1c26137889785 /pcre_valid_utf8.c
parentdba7b5b11ce2b26cc1747f58910da27dd15a72b3 (diff)
downloadpcre-fddeb9c5b24e4d2799afbf6367980c5d982df4d2.tar.gz
Correct Unicode string checking in the light of corrigendum #9.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1261 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_valid_utf8.c')
-rw-r--r--pcre_valid_utf8.c15
1 files changed, 2 insertions, 13 deletions
diff --git a/pcre_valid_utf8.c b/pcre_valid_utf8.c
index a415927..3b0f646 100644
--- a/pcre_valid_utf8.c
+++ b/pcre_valid_utf8.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -92,7 +92,7 @@ PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
-PCRE_UTF8_ERR22 Non-character
+PCRE_UTF8_ERR22 Unused (was non-character)
Arguments:
string points to the string
@@ -118,7 +118,6 @@ if (length < 0)
for (p = string; length-- > 0; p++)
{
register pcre_uchar ab, c, d;
- pcre_uint32 v = 0;
c = *p;
if (c < 128) continue; /* ASCII character */
@@ -187,7 +186,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR14;
}
- v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f);
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
@@ -215,7 +213,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR13;
}
- v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f);
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
@@ -290,14 +287,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
}
-
- /* Reject non-characters. The pointer p is currently at the last byte of the
- character. */
- if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef))
- {
- *erroroffset = (int)(p - string) - ab;
- return PCRE_UTF8_ERR22;
- }
}
#else /* Not SUPPORT_UTF */