diff --git a/error.c b/error.c index c96877f4..dbbde3ad 100644 --- a/error.c +++ b/error.c @@ -163,7 +163,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) { } /** - * xmlParserPrintFileContext: + * xmlParserPrintFileContextInternal: * @input: an xmlParserInputPtr input * * Displays current context within the input content for error tracking @@ -172,7 +172,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) { static void xmlParserPrintFileContextInternal(xmlParserInputPtr input , xmlGenericErrorFunc channel, void *data ) { - const xmlChar *cur, *base; + const xmlChar *cur, *base, *start; unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ xmlChar content[81]; /* space for 80 chars + line terminator */ xmlChar *ctnt; @@ -191,19 +191,30 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input , while ((n++ < (sizeof(content)-1)) && (cur > base) && (*(cur) != '\n') && (*(cur) != '\r')) cur--; - if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; + if ((*(cur) == '\n') || (*(cur) == '\r')) { + cur++; + } else { + /* skip over continuation bytes */ + while ((cur < input->cur) && ((*cur & 0xC0) == 0x80)) + cur++; + } /* calculate the error position in terms of the current position */ col = input->cur - cur; /* search forward for end-of-line (to max buff size) */ n = 0; - ctnt = content; + start = cur; /* copy selected text to our buffer */ - while ((*cur != 0) && (*(cur) != '\n') && - (*(cur) != '\r') && (n < sizeof(content)-1)) { - *ctnt++ = *cur++; - n++; + while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) { + int len = input->end - cur; + int c = xmlGetUTF8Char(cur, &len); + + if ((c < 0) || (n + len > sizeof(content)-1)) + break; + cur += len; + n += len; } - *ctnt = 0; + memcpy(content, start, n); + content[n] = 0; /* print out the selected text */ channel(data ,"%s\n", content); /* create blank line with problem pointer */ diff --git a/result/HTML/utf8bug.html.err b/result/HTML/utf8bug.html.err index a6ef047d..b51aa95f 100644 --- a/result/HTML/utf8bug.html.err +++ b/result/HTML/utf8bug.html.err @@ -11,5 +11,5 @@ ز همکاران است. روی آن کلیک کند. + - ^ + + - ^ + + +%xx; ^ Entity: line 2: +%xx; ^ ./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ diff --git a/result/errors/759573-2.xml.err b/result/errors/759573-2.xml.err index 300c7b3c..51cb2d95 100644 --- a/result/errors/759573-2.xml.err +++ b/result/errors/759573-2.xml.err @@ -18,18 +18,18 @@ Entity: line 1: ^ ./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ Entity: line 2: +%xx; ^ ./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml:6: parser error : Start tag expected, '<' not found -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ diff --git a/result/errors/759573-2.xml.str b/result/errors/759573-2.xml.str index ff0cbdc2..3b7419eb 100644 --- a/result/errors/759573-2.xml.str +++ b/result/errors/759573-2.xml.str @@ -18,16 +18,16 @@ Entity: line 1: ^ ./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ Entity: line 2: +%xx; ^ ./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated -%xx;ÿggKENSMYNT#MENTDŴzz;'> +%xx; ^ ./test/errors/759573-2.xml : failed to parse diff --git a/result/errors/cdata.xml.ent b/result/errors/cdata.xml.ent index f757963f..f7c7b48f 100644 --- a/result/errors/cdata.xml.ent +++ b/result/errors/cdata.xml.ent @@ -1,4 +1,4 @@ ./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding ! Bytes: 0xE1 0x72 0x5D 0x5D - + + +.<<€€€€€€€€€€€€€€€€€€€€€€€€ + ^ +./test/errors/utf8-2.xml:2: parser error : Couldn't find end of Start Tag €€€€€€€€€€€€€€€€€€€€€€€€€ line 1 + +^ +./test/errors/utf8-2.xml:2: parser error : Premature end of data in tag d line 1 + +^ diff --git a/result/errors/utf8-2.xml.err b/result/errors/utf8-2.xml.err new file mode 100644 index 00000000..1631d664 --- /dev/null +++ b/result/errors/utf8-2.xml.err @@ -0,0 +1,9 @@ +./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name +.<<€€€€€€€€€€€€€€€€€€€€€€€€ + ^ +./test/errors/utf8-2.xml:2: parser error : Couldn't find end of Start Tag €€€€€€€€€€€€€€€€€€€€€€€€€ line 1 + +^ +./test/errors/utf8-2.xml:2: parser error : Premature end of data in tag d line 1 + +^ diff --git a/result/errors/utf8-2.xml.str b/result/errors/utf8-2.xml.str new file mode 100644 index 00000000..f8db7f72 --- /dev/null +++ b/result/errors/utf8-2.xml.str @@ -0,0 +1,4 @@ +./test/errors/utf8-2.xml:1: parser error : StartTag: invalid element name +.<<€€€€€€€€€€€€€€€€€€€€€€€€ + ^ +./test/errors/utf8-2.xml : failed to parse diff --git a/runtest.c b/runtest.c index c5545fba..7e345c98 100644 --- a/runtest.c +++ b/runtest.c @@ -292,7 +292,7 @@ channel(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { } /** - * xmlParserPrintFileContext: + * xmlParserPrintFileContextInternal: * @input: an xmlParserInputPtr input * * Displays current context within the input content for error tracking @@ -301,12 +301,14 @@ channel(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { static void xmlParserPrintFileContextInternal(xmlParserInputPtr input , xmlGenericErrorFunc chanl, void *data ) { - const xmlChar *cur, *base; + const xmlChar *cur, *base, *start; unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ xmlChar content[81]; /* space for 80 chars + line terminator */ xmlChar *ctnt; - if (input == NULL) return; + if ((input == NULL) || (input->cur == NULL)) + return; + cur = input->cur; base = input->base; /* skip backwards over any end-of-lines */ @@ -316,21 +318,32 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input , n = 0; /* search backwards for beginning-of-line (to max buff size) */ while ((n++ < (sizeof(content)-1)) && (cur > base) && - (*(cur) != '\n') && (*(cur) != '\r')) + (*(cur) != '\n') && (*(cur) != '\r')) cur--; - if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; + if ((*(cur) == '\n') || (*(cur) == '\r')) { + cur++; + } else { + /* skip over continuation bytes */ + while ((cur < input->cur) && ((*cur & 0xC0) == 0x80)) + cur++; + } /* calculate the error position in terms of the current position */ col = input->cur - cur; /* search forward for end-of-line (to max buff size) */ n = 0; - ctnt = content; + start = cur; /* copy selected text to our buffer */ - while ((*cur != 0) && (*(cur) != '\n') && - (*(cur) != '\r') && (n < sizeof(content)-1)) { - *ctnt++ = *cur++; - n++; + while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) { + int len = input->end - cur; + int c = xmlGetUTF8Char(cur, &len); + + if ((c < 0) || (n + len > sizeof(content)-1)) + break; + cur += len; + n += len; } - *ctnt = 0; + memcpy(content, start, n); + content[n] = 0; /* print out the selected text */ chanl(data ,"%s\n", content); /* create blank line with problem pointer */ diff --git a/test/errors/utf8-1.xml b/test/errors/utf8-1.xml new file mode 100644 index 00000000..0481aa47 --- /dev/null +++ b/test/errors/utf8-1.xml @@ -0,0 +1 @@ +Ä..............................................................................<< diff --git a/test/errors/utf8-2.xml b/test/errors/utf8-2.xml new file mode 100644 index 00000000..190c7f81 --- /dev/null +++ b/test/errors/utf8-2.xml @@ -0,0 +1 @@ +.<<€€€€€€€€€€€€€€€€€€€€€€€€€