diff options
author | crvi <crvisqr@gmail.com> | 2021-03-18 19:03:47 +0530 |
---|---|---|
committer | Bastien Nocera <hadess@hadess.net> | 2021-06-24 16:18:47 +0200 |
commit | e84cf43c7080774f5148a82e33118ad3f902a003 (patch) | |
tree | a5d82a4314a71c7d590d519a5dbae28bf39e1202 /plparse | |
parent | e85f88f82cbec9c6e503ebdcc6cb8549883a3cf9 (diff) | |
download | totem-pl-parser-e84cf43c7080774f5148a82e33118ad3f902a003.tar.gz |
plparser: Print illegal char byte sequence info during utf-8 conversion error
This is useful for debugging invalid XML documents. Provides the
following debug information.
Invalid byte sequence in conversion input: byte offset 22493, byte:
'\xe2', byte context: 'ience from Valentine\xe2'
Diffstat (limited to 'plparse')
-rw-r--r-- | plparse/totem-pl-parser.c | 22 |
1 files changed, 18 insertions, 4 deletions
diff --git a/plparse/totem-pl-parser.c b/plparse/totem-pl-parser.c index 534d268..4656f6e 100644 --- a/plparse/totem-pl-parser.c +++ b/plparse/totem-pl-parser.c @@ -159,6 +159,7 @@ #define READ_CHUNK_SIZE 8192 #define RECURSE_LEVEL_MAX 4 +#define ILLEGAL_CONTEXT_LENGTH 20 #define D(x) if (debug) x @@ -1885,7 +1886,7 @@ totem_pl_parser_parse_xml_relaxed (char *contents, g_autoptr(GError) error = NULL; g_autofree char *encoding = NULL; g_autofree char *new_contents = NULL; - gsize new_size; + gsize new_size, bytes_read; xml_parser_t *xml_parser; totem_pl_parser_cleanup_xml (contents); @@ -1921,10 +1922,23 @@ totem_pl_parser_parse_xml_relaxed (char *contents, xml_parser_free_tree (doc); - new_contents = g_convert (contents, size, "UTF-8", encoding, NULL, &new_size, &error); + new_contents = g_convert (contents, size, "UTF-8", encoding, &bytes_read, &new_size, &error); if (new_contents == NULL) { - g_warning ("Failed to convert XML data from '%s' to '%s': %s", - encoding, "UTF-8", error->message); + g_autofree char *message = NULL; + message = g_strdup_printf ("Failed to convert XML data from '%s' to '%s': %s", + encoding, "UTF-8", error->message); + + if (error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE) { + int context_length = MIN (bytes_read, ILLEGAL_CONTEXT_LENGTH); + + g_warning ("%s: byte offset %" G_GSIZE_FORMAT ", byte: '%.1s', byte context: '%.*s'", + message, bytes_read, contents + bytes_read, + context_length + 1, + contents + bytes_read - context_length); + } else { + g_warning ("%s", message); + } + return NULL; } |