diff options
author | Nick Wellnhofer <wellnhofer@aevum.de> | 2022-12-04 23:01:00 +0100 |
---|---|---|
committer | Nick Wellnhofer <wellnhofer@aevum.de> | 2022-12-04 23:34:19 +0100 |
commit | 76c6da420923f2721a2e16adfcef8707a2454a1b (patch) | |
tree | 0994905c66a72034318b3fdcb4e696e14ec3a04f | |
parent | 4b959ee1680e2ffbf49cd260cbef08f94e0ab6b0 (diff) | |
download | libxml2-76c6da420923f2721a2e16adfcef8707a2454a1b.tar.gz |
error: Make sure that error messages are valid UTF-8
This has caused issues with the Python bindings for a long time.
Should fix #64.
-rw-r--r-- | error.c | 29 | ||||
-rw-r--r-- | result/HTML/utf8bug.html.err | 2 | ||||
-rw-r--r-- | result/errors/754947.xml.ent | 6 | ||||
-rw-r--r-- | result/errors/754947.xml.err | 6 | ||||
-rw-r--r-- | result/errors/754947.xml.str | 2 | ||||
-rw-r--r-- | result/errors/759573-2.xml.ent | 8 | ||||
-rw-r--r-- | result/errors/759573-2.xml.err | 8 | ||||
-rw-r--r-- | result/errors/759573-2.xml.str | 6 | ||||
-rw-r--r-- | result/errors/cdata.xml.ent | 2 | ||||
-rw-r--r-- | result/errors/cdata.xml.err | 2 | ||||
-rw-r--r-- | result/errors/cdata.xml.str | 2 | ||||
-rw-r--r-- | result/errors/utf8-1.xml.ent | 9 | ||||
-rw-r--r-- | result/errors/utf8-1.xml.err | 9 | ||||
-rw-r--r-- | result/errors/utf8-1.xml.str | 4 | ||||
-rw-r--r-- | result/errors/utf8-2.xml.ent | 9 | ||||
-rw-r--r-- | result/errors/utf8-2.xml.err | 9 | ||||
-rw-r--r-- | result/errors/utf8-2.xml.str | 4 | ||||
-rw-r--r-- | runtest.c | 35 | ||||
-rw-r--r-- | test/errors/utf8-1.xml | 1 | ||||
-rw-r--r-- | test/errors/utf8-2.xml | 1 |
20 files changed, 112 insertions, 42 deletions
@@ -163,7 +163,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) { } /** - * xmlParserPrintFileContext: + * xmlParserPrintFileContextInternal: * @input: an xmlParserInputPtr input * * Displays current context within the input content for error tracking @@ -172,7 +172,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) { static void xmlParserPrintFileContextInternal(xmlParserInputPtr input , xmlGenericErrorFunc channel, void *data ) { - const xmlChar *cur, *base; + const xmlChar *cur, *base, *start; unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ xmlChar content[81]; /* space for 80 chars + line terminator */ xmlChar *ctnt; @@ -191,19 +191,30 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input , while ((n++ < (sizeof(content)-1)) && (cur > base) && (*(cur) != '\n') && (*(cur) != '\r')) cur--; - if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; + if ((*(cur) == '\n') || (*(cur) == '\r')) { + cur++; + } else { + /* skip over continuation bytes */ + while ((cur < input->cur) && ((*cur & 0xC0) == 0x80)) + cur++; + } /* calculate the error position in terms of the current position */ col = input->cur - cur; /* search forward for end-of-line (to max buff size) */ n = 0; - ctnt = content; + start = cur; /* copy selected text to our buffer */ - while ((*cur != 0) && (*(cur) != '\n') && - (*(cur) != '\r') && (n < sizeof(content)-1)) { - *ctnt++ = *cur++; - n++; + while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) { + int len = input->end - cur; + int c = xmlGetUTF8Char(cur, &len); + + if ((c < 0) || (n + len > sizeof(content)-1)) + break; + cur += len; + n += len; } - *ctnt = 0; + memcpy(content, start, n); + content[n] = 0; /* print out the selected text */ channel(data ,"%s\n", content); /* create blank line with problem pointer */ diff --git a/result/HTML/utf8bug.html.err b/result/HTML/utf8bug.html.err index a6ef047d..b51aa95f 100644 --- a/result/HTML/utf8bug.html.err +++ b/result/HTML/utf8bug.html.err @@ -11,5 +11,5 @@ ز همکاران است. روی آن کلیک کند.</FONT></FONT></STRONG><S1 ^ ./test/HTML/utf8bug.html:177: HTML parser error : htmlParseEntityRef: expecting ';' -§ÛŒÙ† پاسخ را برای نویسنده مقاله رجانیوز copy&paste +ین پاسخ را برای نویسنده مقاله رجانیوز copy&paste ^ diff --git a/result/errors/754947.xml.ent b/result/errors/754947.xml.ent index f45cb5a2..1ea73d13 100644 --- a/result/errors/754947.xml.ent +++ b/result/errors/754947.xml.ent @@ -1,7 +1,7 @@ ./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! Bytes: 0xEE 0x5D 0x5D 0x3E -<d><![CDATA[0000000000000î]]> +<d><![CDATA[0000000000000 ^ ./test/errors/754947.xml:1: parser error : Premature end of data in tag d line 1 -<d><![CDATA[0000000000000î]]> - ^ +<d><![CDATA[0000000000000 + ^ diff --git a/result/errors/754947.xml.err b/result/errors/754947.xml.err index f45cb5a2..1ea73d13 100644 --- a/result/errors/754947.xml.err +++ b/result/errors/754947.xml.err @@ -1,7 +1,7 @@ ./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! Bytes: 0xEE 0x5D 0x5D 0x3E -<d><![CDATA[0000000000000î]]> +<d><![CDATA[0000000000000 ^ ./test/errors/754947.xml:1: parser error : Premature end of data in tag d line 1 -<d><![CDATA[0000000000000î]]> - ^ +<d><![CDATA[0000000000000 + ^ diff --git a/result/errors/754947.xml.str b/result/errors/754947.xml.str index 4d2f52e3..d248323d 100644 --- a/result/errors/754947.xml.str +++ b/result/errors/754947.xml.str @@ -1,5 +1,5 @@ ./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! Bytes: 0xEE 0x5D 0x5D 0x3E -<d><![CDATA[0000000000000î]]> +<d><![CDATA[0000000000000 ^ ./test/errors/754947.xml : failed to parse diff --git a/result/errors/759573-2.xml.ent b/result/errors/759573-2.xml.ent index 300c7b3c..51cb2d95 100644 --- a/result/errors/759573-2.xml.ent +++ b/result/errors/759573-2.xml.ent @@ -18,18 +18,18 @@ Entity: line 1: ^ ./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ Entity: line 2: <![INCLUDE[ ^ ./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ diff --git a/result/errors/759573-2.xml.err b/result/errors/759573-2.xml.err index 300c7b3c..51cb2d95 100644 --- a/result/errors/759573-2.xml.err +++ b/result/errors/759573-2.xml.err @@ -18,18 +18,18 @@ Entity: line 1: ^ ./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ Entity: line 2: <![INCLUDE[ ^ ./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ diff --git a/result/errors/759573-2.xml.str b/result/errors/759573-2.xml.str index ff0cbdc2..3b7419eb 100644 --- a/result/errors/759573-2.xml.str +++ b/result/errors/759573-2.xml.str @@ -18,16 +18,16 @@ Entity: line 1: ^ ./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ Entity: line 2: <![INCLUDE[ ^ ./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml : failed to parse diff --git a/result/errors/cdata.xml.ent b/result/errors/cdata.xml.ent index f757963f..f7c7b48f 100644 --- a/result/errors/cdata.xml.ent +++ b/result/errors/cdata.xml.ent @@ -1,4 +1,4 @@ ./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding ! Bytes: 0xE1 0x72 0x5D 0x5D -<A><![CDATA[Cár]]></A> +<A><![CDATA[C ^ diff --git a/result/errors/cdata.xml.err b/result/errors/cdata.xml.err index f757963f..f7c7b48f 100644 --- a/result/errors/cdata.xml.err +++ b/result/errors/cdata.xml.err @@ -1,4 +1,4 @@ ./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding ! Bytes: 0xE1 0x72 0x5D 0x5D -<A><![CDATA[Cár]]></A> +<A><![CDATA[C ^ diff --git a/result/errors/cdata.xml.str b/result/errors/cdata.xml.str index cf83d2b3..af1321e0 100644 --- a/result/errors/cdata.xml.str +++ b/result/errors/cdata.xml.str @@ -1,5 +1,5 @@ ./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding ! Bytes: 0xE1 0x72 0x5D 0x5D -<A><![CDATA[Cár]]></A> +<A><![CDATA[C ^ ./test/errors/cdata.xml : failed to parse diff --git a/result/errors/utf8-1.xml.ent b/result/errors/utf8-1.xml.ent new file mode 100644 index 00000000..35d84980 --- /dev/null +++ b/result/errors/utf8-1.xml.ent @@ -0,0 +1,9 @@ +./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name +..............................................................................<< + ^ +./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name +..............................................................................<< + ^ +./test/errors/utf8-1.xml:2: parser error : Premature end of data in tag d line 1 + +^ diff --git a/result/errors/utf8-1.xml.err b/result/errors/utf8-1.xml.err new file mode 100644 index 00000000..35d84980 --- /dev/null +++ b/result/errors/utf8-1.xml.err @@ -0,0 +1,9 @@ +./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name +..............................................................................<< + ^ +./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name +..............................................................................<< + ^ +./test/errors/utf8-1.xml:2: parser error : Premature end of data in tag d line 1 + +^ diff --git a/result/errors/utf8-1.xml.str b/result/errors/utf8-1.xml.str new file mode 100644 index 00000000..8c6df1d7 --- /dev/null +++ b/result/errors/utf8-1.xml.str @@ -0,0 +1,4 @@ +./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name +..............................................................................<< + ^ +./test/errors/utf8-1.xml : failed to parse diff --git a/result/errors/utf8-2.xml.ent b/result/errors/utf8-2.xml.ent new file mode 100644 index 00000000..1631d664 --- /dev/null +++ b/result/errors/utf8-2.xml.ent @@ -0,0 +1,9 @@ +./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name +<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€ + ^ +./test/errors/utf8-2.xml:2: parser error : Couldn't find end of Start Tag €€€€€€€€€€€€€€€€€€€€€€€€€ line 1 + +^ +./test/errors/utf8-2.xml:2: parser error : Premature end of data in tag d line 1 + +^ diff --git a/result/errors/utf8-2.xml.err b/result/errors/utf8-2.xml.err new file mode 100644 index 00000000..1631d664 --- /dev/null +++ b/result/errors/utf8-2.xml.err @@ -0,0 +1,9 @@ +./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name +<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€ + ^ +./test/errors/utf8-2.xml:2: parser error : Couldn't find end of Start Tag €€€€€€€€€€€€€€€€€€€€€€€€€ line 1 + +^ +./test/errors/utf8-2.xml:2: parser error : Premature end of data in tag d line 1 + +^ diff --git a/result/errors/utf8-2.xml.str b/result/errors/utf8-2.xml.str new file mode 100644 index 00000000..f8db7f72 --- /dev/null +++ b/result/errors/utf8-2.xml.str @@ -0,0 +1,4 @@ +./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name +<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€ + ^ +./test/errors/utf8-2.xml : failed to parse @@ -292,7 +292,7 @@ channel(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { } /** - * xmlParserPrintFileContext: + * xmlParserPrintFileContextInternal: * @input: an xmlParserInputPtr input * * Displays current context within the input content for error tracking @@ -301,12 +301,14 @@ channel(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { static void xmlParserPrintFileContextInternal(xmlParserInputPtr input , xmlGenericErrorFunc chanl, void *data ) { - const xmlChar *cur, *base; + const xmlChar *cur, *base, *start; unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ xmlChar content[81]; /* space for 80 chars + line terminator */ xmlChar *ctnt; - if (input == NULL) return; + if ((input == NULL) || (input->cur == NULL)) + return; + cur = input->cur; base = input->base; /* skip backwards over any end-of-lines */ @@ -316,21 +318,32 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input , n = 0; /* search backwards for beginning-of-line (to max buff size) */ while ((n++ < (sizeof(content)-1)) && (cur > base) && - (*(cur) != '\n') && (*(cur) != '\r')) + (*(cur) != '\n') && (*(cur) != '\r')) cur--; - if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; + if ((*(cur) == '\n') || (*(cur) == '\r')) { + cur++; + } else { + /* skip over continuation bytes */ + while ((cur < input->cur) && ((*cur & 0xC0) == 0x80)) + cur++; + } /* calculate the error position in terms of the current position */ col = input->cur - cur; /* search forward for end-of-line (to max buff size) */ n = 0; - ctnt = content; + start = cur; /* copy selected text to our buffer */ - while ((*cur != 0) && (*(cur) != '\n') && - (*(cur) != '\r') && (n < sizeof(content)-1)) { - *ctnt++ = *cur++; - n++; + while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) { + int len = input->end - cur; + int c = xmlGetUTF8Char(cur, &len); + + if ((c < 0) || (n + len > sizeof(content)-1)) + break; + cur += len; + n += len; } - *ctnt = 0; + memcpy(content, start, n); + content[n] = 0; /* print out the selected text */ chanl(data ,"%s\n", content); /* create blank line with problem pointer */ diff --git a/test/errors/utf8-1.xml b/test/errors/utf8-1.xml new file mode 100644 index 00000000..0481aa47 --- /dev/null +++ b/test/errors/utf8-1.xml @@ -0,0 +1 @@ +<d>Ä..............................................................................<< diff --git a/test/errors/utf8-2.xml b/test/errors/utf8-2.xml new file mode 100644 index 00000000..190c7f81 --- /dev/null +++ b/test/errors/utf8-2.xml @@ -0,0 +1 @@ +<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€€ |