diff options
author | Bastien Nocera <hadess@hadess.net> | 2021-03-04 15:25:04 +0100 |
---|---|---|
committer | Bastien Nocera <hadess@hadess.net> | 2021-03-04 17:35:38 +0100 |
commit | a19de02ba7586b576c42d8f0758eeaef94652d2b (patch) | |
tree | eefb0cdba6d39e1c05001de9955096d159669f66 | |
parent | d7e724a409b0df09bf7d677169af898aa98e6d9f (diff) | |
download | totem-pl-parser-a19de02ba7586b576c42d8f0758eeaef94652d2b.tar.gz |
plparser: Validate UTF-8 before returning it
We shouldn't blindly return data as UTF-8 simply because the XML header
says that it is UTF-8.
-rw-r--r-- | plparse/totem-pl-parser.c | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/plparse/totem-pl-parser.c b/plparse/totem-pl-parser.c index a1bec2b..9869701 100644 --- a/plparse/totem-pl-parser.c +++ b/plparse/totem-pl-parser.c @@ -1874,8 +1874,14 @@ totem_pl_parser_parse_xml_relaxed (char *contents, break; } - if (encoding == NULL || g_ascii_strcasecmp (encoding, "UTF-8") == 0) - return doc; + if (encoding == NULL || g_ascii_strcasecmp (encoding, "UTF-8") == 0) { + if (g_utf8_validate (contents, -1, NULL)) + return doc; + g_debug ("Document %s pretended to be in UTF-8 but didn't validate", + encoding ? "explicitly" : "implicitly"); + /* FIXME detect encoding using uchardet */ + return NULL; + } xml_parser_free_tree (doc); |