summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBastien Nocera <hadess@hadess.net>2021-03-04 15:25:04 +0100
committerBastien Nocera <hadess@hadess.net>2021-03-04 17:35:38 +0100
commita19de02ba7586b576c42d8f0758eeaef94652d2b (patch)
treeeefb0cdba6d39e1c05001de9955096d159669f66
parentd7e724a409b0df09bf7d677169af898aa98e6d9f (diff)
downloadtotem-pl-parser-a19de02ba7586b576c42d8f0758eeaef94652d2b.tar.gz
plparser: Validate UTF-8 before returning it
We shouldn't blindly return data as UTF-8 simply because the XML header says that it is UTF-8.
-rw-r--r--plparse/totem-pl-parser.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/plparse/totem-pl-parser.c b/plparse/totem-pl-parser.c
index a1bec2b..9869701 100644
--- a/plparse/totem-pl-parser.c
+++ b/plparse/totem-pl-parser.c
@@ -1874,8 +1874,14 @@ totem_pl_parser_parse_xml_relaxed (char *contents,
break;
}
- if (encoding == NULL || g_ascii_strcasecmp (encoding, "UTF-8") == 0)
- return doc;
+ if (encoding == NULL || g_ascii_strcasecmp (encoding, "UTF-8") == 0) {
+ if (g_utf8_validate (contents, -1, NULL))
+ return doc;
+ g_debug ("Document %s pretended to be in UTF-8 but didn't validate",
+ encoding ? "explicitly" : "implicitly");
+ /* FIXME detect encoding using uchardet */
+ return NULL;
+ }
xml_parser_free_tree (doc);