summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Hockey <joel.hockey@gmail.com>2020-08-16 17:19:35 -0700
committerNick Wellnhofer <wellnhofer@aevum.de>2021-04-22 11:57:32 +0200
commitbf22713507fe1fc3a2c4b525cf0a88c2dc87a3a2 (patch)
tree7fd75b0f9f012404f13c39080f5bdba889b22147
parent1358d157d0bd83be1dfe356a69213df9fac0b539 (diff)
downloadlibxml2-bf22713507fe1fc3a2c4b525cf0a88c2dc87a3a2.tar.gz
Validate UTF8 in xmlEncodeEntities
Code is currently assuming UTF-8 without validating. Truncated UTF-8 input can cause out-of-bounds array access. Adds further checks to partial fix in 50f06b3e. Fixes #178
-rw-r--r--entities.c16
1 files changed, 15 insertions, 1 deletions
diff --git a/entities.c b/entities.c
index 37b99a56..1a8f86f0 100644
--- a/entities.c
+++ b/entities.c
@@ -704,11 +704,25 @@ xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
} else {
/*
* We assume we have UTF-8 input.
+ * It must match either:
+ * 110xxxxx 10xxxxxx
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * That is:
+ * cur[0] is 11xxxxxx
+ * cur[1] is 10xxxxxx
+ * cur[2] is 10xxxxxx if cur[0] is 111xxxxx
+ * cur[3] is 10xxxxxx if cur[0] is 1111xxxx
+ * cur[0] is not 11111xxx
*/
char buf[11], *ptr;
int val = 0, l = 1;
- if (*cur < 0xC0) {
+ if (((cur[0] & 0xC0) != 0xC0) ||
+ ((cur[1] & 0xC0) != 0x80) ||
+ (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
+ (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
+ (((cur[0] & 0xF8) == 0xF8))) {
xmlEntitiesErr(XML_CHECK_NOT_UTF8,
"xmlEncodeEntities: input not UTF-8");
if (doc != NULL)