summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2022-12-04 23:01:00 +0100
committerNick Wellnhofer <wellnhofer@aevum.de>2022-12-04 23:34:19 +0100
commit76c6da420923f2721a2e16adfcef8707a2454a1b (patch)
tree0994905c66a72034318b3fdcb4e696e14ec3a04f
parent4b959ee1680e2ffbf49cd260cbef08f94e0ab6b0 (diff)
downloadlibxml2-76c6da420923f2721a2e16adfcef8707a2454a1b.tar.gz
error: Make sure that error messages are valid UTF-8
This has caused issues with the Python bindings for a long time. Should fix #64.
-rw-r--r--error.c29
-rw-r--r--result/HTML/utf8bug.html.err2
-rw-r--r--result/errors/754947.xml.ent6
-rw-r--r--result/errors/754947.xml.err6
-rw-r--r--result/errors/754947.xml.str2
-rw-r--r--result/errors/759573-2.xml.ent8
-rw-r--r--result/errors/759573-2.xml.err8
-rw-r--r--result/errors/759573-2.xml.str6
-rw-r--r--result/errors/cdata.xml.ent2
-rw-r--r--result/errors/cdata.xml.err2
-rw-r--r--result/errors/cdata.xml.str2
-rw-r--r--result/errors/utf8-1.xml.ent9
-rw-r--r--result/errors/utf8-1.xml.err9
-rw-r--r--result/errors/utf8-1.xml.str4
-rw-r--r--result/errors/utf8-2.xml.ent9
-rw-r--r--result/errors/utf8-2.xml.err9
-rw-r--r--result/errors/utf8-2.xml.str4
-rw-r--r--runtest.c35
-rw-r--r--test/errors/utf8-1.xml1
-rw-r--r--test/errors/utf8-2.xml1
20 files changed, 112 insertions, 42 deletions
diff --git a/error.c b/error.c
index c96877f4..dbbde3ad 100644
--- a/error.c
+++ b/error.c
@@ -163,7 +163,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) {
}
/**
- * xmlParserPrintFileContext:
+ * xmlParserPrintFileContextInternal:
* @input: an xmlParserInputPtr input
*
* Displays current context within the input content for error tracking
@@ -172,7 +172,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) {
static void
xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
xmlGenericErrorFunc channel, void *data ) {
- const xmlChar *cur, *base;
+ const xmlChar *cur, *base, *start;
unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */
xmlChar content[81]; /* space for 80 chars + line terminator */
xmlChar *ctnt;
@@ -191,19 +191,30 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
while ((n++ < (sizeof(content)-1)) && (cur > base) &&
(*(cur) != '\n') && (*(cur) != '\r'))
cur--;
- if ((*(cur) == '\n') || (*(cur) == '\r')) cur++;
+ if ((*(cur) == '\n') || (*(cur) == '\r')) {
+ cur++;
+ } else {
+ /* skip over continuation bytes */
+ while ((cur < input->cur) && ((*cur & 0xC0) == 0x80))
+ cur++;
+ }
/* calculate the error position in terms of the current position */
col = input->cur - cur;
/* search forward for end-of-line (to max buff size) */
n = 0;
- ctnt = content;
+ start = cur;
/* copy selected text to our buffer */
- while ((*cur != 0) && (*(cur) != '\n') &&
- (*(cur) != '\r') && (n < sizeof(content)-1)) {
- *ctnt++ = *cur++;
- n++;
+ while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) {
+ int len = input->end - cur;
+ int c = xmlGetUTF8Char(cur, &len);
+
+ if ((c < 0) || (n + len > sizeof(content)-1))
+ break;
+ cur += len;
+ n += len;
}
- *ctnt = 0;
+ memcpy(content, start, n);
+ content[n] = 0;
/* print out the selected text */
channel(data ,"%s\n", content);
/* create blank line with problem pointer */
diff --git a/result/HTML/utf8bug.html.err b/result/HTML/utf8bug.html.err
index a6ef047d..b51aa95f 100644
--- a/result/HTML/utf8bug.html.err
+++ b/result/HTML/utf8bug.html.err
@@ -11,5 +11,5 @@
ز همکاران است. روی آن کلیک کند.</FONT></FONT></STRONG><S1
^
./test/HTML/utf8bug.html:177: HTML parser error : htmlParseEntityRef: expecting ';'
-§ÛŒÙ† پاسخ را برای نویسنده مقاله رجانیوز copy&paste
+ین پاسخ را برای نویسنده مقاله رجانیوز copy&paste
^
diff --git a/result/errors/754947.xml.ent b/result/errors/754947.xml.ent
index f45cb5a2..1ea73d13 100644
--- a/result/errors/754947.xml.ent
+++ b/result/errors/754947.xml.ent
@@ -1,7 +1,7 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xEE 0x5D 0x5D 0x3E
-<d><![CDATA[0000000000000î]]>
+<d><![CDATA[0000000000000
^
./test/errors/754947.xml:1: parser error : Premature end of data in tag d line 1
-<d><![CDATA[0000000000000î]]>
- ^
+<d><![CDATA[0000000000000
+ ^
diff --git a/result/errors/754947.xml.err b/result/errors/754947.xml.err
index f45cb5a2..1ea73d13 100644
--- a/result/errors/754947.xml.err
+++ b/result/errors/754947.xml.err
@@ -1,7 +1,7 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xEE 0x5D 0x5D 0x3E
-<d><![CDATA[0000000000000î]]>
+<d><![CDATA[0000000000000
^
./test/errors/754947.xml:1: parser error : Premature end of data in tag d line 1
-<d><![CDATA[0000000000000î]]>
- ^
+<d><![CDATA[0000000000000
+ ^
diff --git a/result/errors/754947.xml.str b/result/errors/754947.xml.str
index 4d2f52e3..d248323d 100644
--- a/result/errors/754947.xml.str
+++ b/result/errors/754947.xml.str
@@ -1,5 +1,5 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xEE 0x5D 0x5D 0x3E
-<d><![CDATA[0000000000000î]]>
+<d><![CDATA[0000000000000
^
./test/errors/754947.xml : failed to parse
diff --git a/result/errors/759573-2.xml.ent b/result/errors/759573-2.xml.ent
index 300c7b3c..51cb2d95 100644
--- a/result/errors/759573-2.xml.ent
+++ b/result/errors/759573-2.xml.ent
@@ -18,18 +18,18 @@ Entity: line 1:
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
Entity: line 2:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
diff --git a/result/errors/759573-2.xml.err b/result/errors/759573-2.xml.err
index 300c7b3c..51cb2d95 100644
--- a/result/errors/759573-2.xml.err
+++ b/result/errors/759573-2.xml.err
@@ -18,18 +18,18 @@ Entity: line 1:
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
Entity: line 2:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
diff --git a/result/errors/759573-2.xml.str b/result/errors/759573-2.xml.str
index ff0cbdc2..3b7419eb 100644
--- a/result/errors/759573-2.xml.str
+++ b/result/errors/759573-2.xml.str
@@ -18,16 +18,16 @@ Entity: line 1:
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
Entity: line 2:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated
-%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
+%xx;
^
./test/errors/759573-2.xml : failed to parse
diff --git a/result/errors/cdata.xml.ent b/result/errors/cdata.xml.ent
index f757963f..f7c7b48f 100644
--- a/result/errors/cdata.xml.ent
+++ b/result/errors/cdata.xml.ent
@@ -1,4 +1,4 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE1 0x72 0x5D 0x5D
-<A><![CDATA[Cár]]></A>
+<A><![CDATA[C
^
diff --git a/result/errors/cdata.xml.err b/result/errors/cdata.xml.err
index f757963f..f7c7b48f 100644
--- a/result/errors/cdata.xml.err
+++ b/result/errors/cdata.xml.err
@@ -1,4 +1,4 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE1 0x72 0x5D 0x5D
-<A><![CDATA[Cár]]></A>
+<A><![CDATA[C
^
diff --git a/result/errors/cdata.xml.str b/result/errors/cdata.xml.str
index cf83d2b3..af1321e0 100644
--- a/result/errors/cdata.xml.str
+++ b/result/errors/cdata.xml.str
@@ -1,5 +1,5 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE1 0x72 0x5D 0x5D
-<A><![CDATA[Cár]]></A>
+<A><![CDATA[C
^
./test/errors/cdata.xml : failed to parse
diff --git a/result/errors/utf8-1.xml.ent b/result/errors/utf8-1.xml.ent
new file mode 100644
index 00000000..35d84980
--- /dev/null
+++ b/result/errors/utf8-1.xml.ent
@@ -0,0 +1,9 @@
+./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
+..............................................................................<<
+ ^
+./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
+..............................................................................<<
+ ^
+./test/errors/utf8-1.xml:2: parser error : Premature end of data in tag d line 1
+
+^
diff --git a/result/errors/utf8-1.xml.err b/result/errors/utf8-1.xml.err
new file mode 100644
index 00000000..35d84980
--- /dev/null
+++ b/result/errors/utf8-1.xml.err
@@ -0,0 +1,9 @@
+./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
+..............................................................................<<
+ ^
+./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
+..............................................................................<<
+ ^
+./test/errors/utf8-1.xml:2: parser error : Premature end of data in tag d line 1
+
+^
diff --git a/result/errors/utf8-1.xml.str b/result/errors/utf8-1.xml.str
new file mode 100644
index 00000000..8c6df1d7
--- /dev/null
+++ b/result/errors/utf8-1.xml.str
@@ -0,0 +1,4 @@
+./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
+..............................................................................<<
+ ^
+./test/errors/utf8-1.xml : failed to parse
diff --git a/result/errors/utf8-2.xml.ent b/result/errors/utf8-2.xml.ent
new file mode 100644
index 00000000..1631d664
--- /dev/null
+++ b/result/errors/utf8-2.xml.ent
@@ -0,0 +1,9 @@
+./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name
+<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€
+ ^
+./test/errors/utf8-2.xml:2: parser error : Couldn't find end of Start Tag €€€€€€€€€€€€€€€€€€€€€€€€€ line 1
+
+^
+./test/errors/utf8-2.xml:2: parser error : Premature end of data in tag d line 1
+
+^
diff --git a/result/errors/utf8-2.xml.err b/result/errors/utf8-2.xml.err
new file mode 100644
index 00000000..1631d664
--- /dev/null
+++ b/result/errors/utf8-2.xml.err
@@ -0,0 +1,9 @@
+./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name
+<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€
+ ^
+./test/errors/utf8-2.xml:2: parser error : Couldn't find end of Start Tag €€€€€€€€€€€€€€€€€€€€€€€€€ line 1
+
+^
+./test/errors/utf8-2.xml:2: parser error : Premature end of data in tag d line 1
+
+^
diff --git a/result/errors/utf8-2.xml.str b/result/errors/utf8-2.xml.str
new file mode 100644
index 00000000..f8db7f72
--- /dev/null
+++ b/result/errors/utf8-2.xml.str
@@ -0,0 +1,4 @@
+./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name
+<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€
+ ^
+./test/errors/utf8-2.xml : failed to parse
diff --git a/runtest.c b/runtest.c
index c5545fba..7e345c98 100644
--- a/runtest.c
+++ b/runtest.c
@@ -292,7 +292,7 @@ channel(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) {
}
/**
- * xmlParserPrintFileContext:
+ * xmlParserPrintFileContextInternal:
* @input: an xmlParserInputPtr input
*
* Displays current context within the input content for error tracking
@@ -301,12 +301,14 @@ channel(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) {
static void
xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
xmlGenericErrorFunc chanl, void *data ) {
- const xmlChar *cur, *base;
+ const xmlChar *cur, *base, *start;
unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */
xmlChar content[81]; /* space for 80 chars + line terminator */
xmlChar *ctnt;
- if (input == NULL) return;
+ if ((input == NULL) || (input->cur == NULL))
+ return;
+
cur = input->cur;
base = input->base;
/* skip backwards over any end-of-lines */
@@ -316,21 +318,32 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
n = 0;
/* search backwards for beginning-of-line (to max buff size) */
while ((n++ < (sizeof(content)-1)) && (cur > base) &&
- (*(cur) != '\n') && (*(cur) != '\r'))
+ (*(cur) != '\n') && (*(cur) != '\r'))
cur--;
- if ((*(cur) == '\n') || (*(cur) == '\r')) cur++;
+ if ((*(cur) == '\n') || (*(cur) == '\r')) {
+ cur++;
+ } else {
+ /* skip over continuation bytes */
+ while ((cur < input->cur) && ((*cur & 0xC0) == 0x80))
+ cur++;
+ }
/* calculate the error position in terms of the current position */
col = input->cur - cur;
/* search forward for end-of-line (to max buff size) */
n = 0;
- ctnt = content;
+ start = cur;
/* copy selected text to our buffer */
- while ((*cur != 0) && (*(cur) != '\n') &&
- (*(cur) != '\r') && (n < sizeof(content)-1)) {
- *ctnt++ = *cur++;
- n++;
+ while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) {
+ int len = input->end - cur;
+ int c = xmlGetUTF8Char(cur, &len);
+
+ if ((c < 0) || (n + len > sizeof(content)-1))
+ break;
+ cur += len;
+ n += len;
}
- *ctnt = 0;
+ memcpy(content, start, n);
+ content[n] = 0;
/* print out the selected text */
chanl(data ,"%s\n", content);
/* create blank line with problem pointer */
diff --git a/test/errors/utf8-1.xml b/test/errors/utf8-1.xml
new file mode 100644
index 00000000..0481aa47
--- /dev/null
+++ b/test/errors/utf8-1.xml
@@ -0,0 +1 @@
+<d>Ä..............................................................................<<
diff --git a/test/errors/utf8-2.xml b/test/errors/utf8-2.xml
new file mode 100644
index 00000000..190c7f81
--- /dev/null
+++ b/test/errors/utf8-2.xml
@@ -0,0 +1 @@
+<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€€