error: Make sure that error messages are valid UTF-8

This has caused issues with the Python bindings for a long time.

Should fix #64.
This commit is contained in:
Nick Wellnhofer 2022-12-04 23:01:00 +01:00
parent 4b959ee168
commit 76c6da4209
20 changed files with 112 additions and 42 deletions

29
error.c
View File

@ -163,7 +163,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) {
}
/**
* xmlParserPrintFileContext:
* xmlParserPrintFileContextInternal:
* @input: an xmlParserInputPtr input
*
* Displays current context within the input content for error tracking
@ -172,7 +172,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) {
static void
xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
xmlGenericErrorFunc channel, void *data ) {
const xmlChar *cur, *base;
const xmlChar *cur, *base, *start;
unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */
xmlChar content[81]; /* space for 80 chars + line terminator */
xmlChar *ctnt;
@ -191,19 +191,30 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
while ((n++ < (sizeof(content)-1)) && (cur > base) &&
(*(cur) != '\n') && (*(cur) != '\r'))
cur--;
if ((*(cur) == '\n') || (*(cur) == '\r')) cur++;
if ((*(cur) == '\n') || (*(cur) == '\r')) {
cur++;
} else {
/* skip over continuation bytes */
while ((cur < input->cur) && ((*cur & 0xC0) == 0x80))
cur++;
}
/* calculate the error position in terms of the current position */
col = input->cur - cur;
/* search forward for end-of-line (to max buff size) */
n = 0;
ctnt = content;
start = cur;
/* copy selected text to our buffer */
while ((*cur != 0) && (*(cur) != '\n') &&
(*(cur) != '\r') && (n < sizeof(content)-1)) {
*ctnt++ = *cur++;
n++;
while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) {
int len = input->end - cur;
int c = xmlGetUTF8Char(cur, &len);
if ((c < 0) || (n + len > sizeof(content)-1))
break;
cur += len;
n += len;
}
*ctnt = 0;
memcpy(content, start, n);
content[n] = 0;
/* print out the selected text */
channel(data ,"%s\n", content);
/* create blank line with problem pointer */

View File

@ -11,5 +11,5 @@
ز همکاران است. روی آن کلیک کند.</FONT></FONT></STRONG><S1
^
./test/HTML/utf8bug.html:177: HTML parser error : htmlParseEntityRef: expecting ';'
§ÛŒÙ† پاسخ را برای نویسنده مقاله رجانیوز copy&paste
ین پاسخ را برای نویسنده مقاله رجانیوز copy&paste
^

View File

@ -1,7 +1,7 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xEE 0x5D 0x5D 0x3E
<d><![CDATA[0000000000000î]]>
<d><![CDATA[0000000000000
^
./test/errors/754947.xml:1: parser error : Premature end of data in tag d line 1
<d><![CDATA[0000000000000î]]>
^
<d><![CDATA[0000000000000
^

View File

@ -1,7 +1,7 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xEE 0x5D 0x5D 0x3E
<d><![CDATA[0000000000000î]]>
<d><![CDATA[0000000000000
^
./test/errors/754947.xml:1: parser error : Premature end of data in tag d line 1
<d><![CDATA[0000000000000î]]>
^
<d><![CDATA[0000000000000
^

View File

@ -1,5 +1,5 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xEE 0x5D 0x5D 0x3E
<d><![CDATA[0000000000000î]]>
<d><![CDATA[0000000000000
^
./test/errors/754947.xml : failed to parse

View File

@ -18,18 +18,18 @@ Entity: line 1:
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
Entity: line 2:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^

View File

@ -18,18 +18,18 @@ Entity: line 1:
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
Entity: line 2:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^

View File

@ -18,16 +18,16 @@ Entity: line 1:
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
Entity: line 2:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated
%xx;ÿggKENSMYNT&#35;MENTD&#372zz;'>
%xx;
^
./test/errors/759573-2.xml : failed to parse

View File

@ -1,4 +1,4 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE1 0x72 0x5D 0x5D
<A><![CDATA[Cár]]></A>
<A><![CDATA[C
^

View File

@ -1,4 +1,4 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE1 0x72 0x5D 0x5D
<A><![CDATA[Cár]]></A>
<A><![CDATA[C
^

View File

@ -1,5 +1,5 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE1 0x72 0x5D 0x5D
<A><![CDATA[Cár]]></A>
<A><![CDATA[C
^
./test/errors/cdata.xml : failed to parse

View File

@ -0,0 +1,9 @@
./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
..............................................................................<<
^
./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
..............................................................................<<
^
./test/errors/utf8-1.xml:2: parser error : Premature end of data in tag d line 1
^

View File

@ -0,0 +1,9 @@
./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
..............................................................................<<
^
./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
..............................................................................<<
^
./test/errors/utf8-1.xml:2: parser error : Premature end of data in tag d line 1
^

View File

@ -0,0 +1,4 @@
./test/errors/utf8-1.xml:1: parser error : StartTag: invalid element name
..............................................................................<<
^
./test/errors/utf8-1.xml : failed to parse

View File

@ -0,0 +1,9 @@
./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name
<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€
^
./test/errors/utf8-2.xml:2: parser error : Couldn't find end of Start Tag €€€€€€€€€€€€€€€€€€€€€€€€€ line 1
^
./test/errors/utf8-2.xml:2: parser error : Premature end of data in tag d line 1
^

View File

@ -0,0 +1,9 @@
./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name
<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€
^
./test/errors/utf8-2.xml:2: parser error : Couldn't find end of Start Tag €€€€€€€€€€€€€€€€€€€€€€€€€ line 1
^
./test/errors/utf8-2.xml:2: parser error : Premature end of data in tag d line 1
^

View File

@ -0,0 +1,4 @@
./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name
<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€
^
./test/errors/utf8-2.xml : failed to parse

View File

@ -292,7 +292,7 @@ channel(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) {
}
/**
* xmlParserPrintFileContext:
* xmlParserPrintFileContextInternal:
* @input: an xmlParserInputPtr input
*
* Displays current context within the input content for error tracking
@ -301,12 +301,14 @@ channel(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) {
static void
xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
xmlGenericErrorFunc chanl, void *data ) {
const xmlChar *cur, *base;
const xmlChar *cur, *base, *start;
unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */
xmlChar content[81]; /* space for 80 chars + line terminator */
xmlChar *ctnt;
if (input == NULL) return;
if ((input == NULL) || (input->cur == NULL))
return;
cur = input->cur;
base = input->base;
/* skip backwards over any end-of-lines */
@ -316,21 +318,32 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
n = 0;
/* search backwards for beginning-of-line (to max buff size) */
while ((n++ < (sizeof(content)-1)) && (cur > base) &&
(*(cur) != '\n') && (*(cur) != '\r'))
(*(cur) != '\n') && (*(cur) != '\r'))
cur--;
if ((*(cur) == '\n') || (*(cur) == '\r')) cur++;
if ((*(cur) == '\n') || (*(cur) == '\r')) {
cur++;
} else {
/* skip over continuation bytes */
while ((cur < input->cur) && ((*cur & 0xC0) == 0x80))
cur++;
}
/* calculate the error position in terms of the current position */
col = input->cur - cur;
/* search forward for end-of-line (to max buff size) */
n = 0;
ctnt = content;
start = cur;
/* copy selected text to our buffer */
while ((*cur != 0) && (*(cur) != '\n') &&
(*(cur) != '\r') && (n < sizeof(content)-1)) {
*ctnt++ = *cur++;
n++;
while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) {
int len = input->end - cur;
int c = xmlGetUTF8Char(cur, &len);
if ((c < 0) || (n + len > sizeof(content)-1))
break;
cur += len;
n += len;
}
*ctnt = 0;
memcpy(content, start, n);
content[n] = 0;
/* print out the selected text */
chanl(data ,"%s\n", content);
/* create blank line with problem pointer */

1
test/errors/utf8-1.xml Normal file
View File

@ -0,0 +1 @@
<d>Ä..............................................................................<<

1
test/errors/utf8-2.xml Normal file
View File

@ -0,0 +1 @@
<d>.<<€€€€€€€€€€€€€€€€€€€€€€€€€