From 8d0aaf4b95eaaaaadb4d793b8a3cb05b124ad1a2 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 19 Dec 2023 20:47:36 +0100 Subject: [PATCH] parser: Remove xmlErrEncoding Use xmlFatalErr or xmlCtxtErrIO. --- HTMLparser.c | 15 +------- error.c | 16 ++++++++ include/private/parser.h | 4 -- parser.c | 17 ++------- parserInternals.c | 71 +++++------------------------------- result/errors/754947.xml.ent | 2 +- result/errors/754947.xml.err | 2 +- result/errors/754947.xml.str | 2 +- result/errors/cdata.xml.ent | 2 +- result/errors/cdata.xml.err | 2 +- result/errors/cdata.xml.str | 2 +- runtest.c | 16 ++++++++ testchar.c | 28 +++++++------- 13 files changed, 66 insertions(+), 113 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index 2376113f..171df6f0 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -490,20 +490,7 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { } encoding_error: - { - char buffer[150]; - - if (ctxt->input->end - ctxt->input->cur >= 4) { - snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2], ctxt->input->cur[3]); - } else { - snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]); - } - htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, - "Input is not proper UTF-8, indicate encoding !\n", - BAD_CAST buffer, NULL); - } + xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL); if ((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); diff --git a/error.c b/error.c index 42b1323d..2d7fbb91 100644 --- a/error.c +++ b/error.c @@ -543,7 +543,23 @@ xmlReportError(xmlParserCtxtPtr ctxt, const xmlError *err) } if (ctxt != NULL) { + if ((input != NULL) && + ((input->buf == NULL) || (input->buf->encoder == NULL)) && + (code == XML_ERR_INVALID_ENCODING) && + (input->cur < input->end)) { + int i; + + channel(data, "Bytes:"); + for (i = 0; i < 4; i++) { + if (input->cur + i >= input->end) + break; + channel(data, " 0x%02X", input->cur[i]); + } + channel(data, "\n"); + } + xmlParserPrintFileContextInternal(input, channel, data); + if (cur != NULL) { if (cur->filename) channel(data, "%s:%d: \n", cur->filename, cur->line); diff --git a/include/private/parser.h b/include/private/parser.h index 487da2ae..f7be343b 100644 --- a/include/private/parser.h +++ b/include/private/parser.h @@ -46,10 +46,6 @@ XML_HIDDEN void LIBXML_ATTR_FORMAT(3,0) xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar *str1, const xmlChar *str2); XML_HIDDEN void -__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, - const char *msg, const xmlChar *str1, - const xmlChar *str2) LIBXML_ATTR_FORMAT(3,0); -XML_HIDDEN void xmlHaltParser(xmlParserCtxtPtr ctxt); XML_HIDDEN int xmlParserGrow(xmlParserCtxtPtr ctxt); diff --git a/parser.c b/parser.c index 9fc29edc..c18bd438 100644 --- a/parser.c +++ b/parser.c @@ -11877,19 +11877,10 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { done: return(ret); encoding_error: - if (ctxt->input->end - ctxt->input->cur < 4) { - __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, - "Input is not proper UTF-8, indicate encoding !\n", - NULL, NULL); - } else { - char buffer[150]; - - snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2], ctxt->input->cur[3]); - __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, - "Input is not proper UTF-8, indicate encoding !\n%s", - BAD_CAST buffer, NULL); + /* Only report the first error */ + if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) { + xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL); + ctxt->input->flags |= XML_INPUT_ENCODING_ERROR; } return(0); } diff --git a/parserInternals.c b/parserInternals.c index 065a17eb..74b3f213 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -245,24 +245,6 @@ xmlErrParser(xmlParserCtxtPtr ctxt, xmlNodePtr node, va_end(ap); } -/** - * __xmlErrEncoding: - * @ctxt: an XML parser context - * @xmlerr: the error number - * @msg: the error message - * @str1: an string info - * @str2: an string info - * - * Handle an encoding error - */ -void -__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, - const char *msg, const xmlChar * str1, const xmlChar * str2) -{ - xmlErrParser(ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, - str1, str2, NULL, 0, msg, str1, str2); -} - /** * xmlErrInternal: * @ctxt: an XML parser context @@ -659,21 +641,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) encoding_error: /* Only report the first error */ if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) { - if ((ctxt == NULL) || (ctxt->input == NULL) || - (ctxt->input->end - ctxt->input->cur < 4)) { - __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, - "Input is not proper UTF-8, indicate encoding !\n", - NULL, NULL); - } else { - char buffer[150]; - - snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2], ctxt->input->cur[3]); - __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, - "Input is not proper UTF-8, indicate encoding !\n%s", - BAD_CAST buffer, NULL); - } + xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL); ctxt->input->flags |= XML_INPUT_ENCODING_ERROR; } ctxt->input->cur++; @@ -809,20 +777,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { encoding_error: /* Only report the first error */ if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) { - if (ctxt->input->end - ctxt->input->cur < 4) { - __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, - "Input is not proper UTF-8, indicate encoding !\n", - NULL, NULL); - } else { - char buffer[150]; - - snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2], ctxt->input->cur[3]); - __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, - "Input is not proper UTF-8, indicate encoding !\n%s", - BAD_CAST buffer, NULL); - } + xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL); ctxt->input->flags |= XML_INPUT_ENCODING_ERROR; } *len = 1; @@ -1050,14 +1005,9 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) } if (res != 0) { - if (res == XML_ERR_UNSUPPORTED_ENCODING) { - const char *name = xmlGetCharEncodingName(enc); + const char *name = xmlGetCharEncodingName(enc); - __xmlErrEncoding(ctxt, res, "encoding not supported: %s\n", - BAD_CAST (name ? name : ""), NULL); - } else { - xmlFatalErr(ctxt, res, NULL); - } + xmlFatalErr(ctxt, res, (name ? name : "")); return(-1); } @@ -1090,13 +1040,12 @@ xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) { xmlCharEncodingHandlerPtr handler; int res; + if (encoding == NULL) + return(-1); + res = xmlOpenCharEncodingHandler(encoding, &handler); if (res != 0) { - if (res == XML_ERR_UNSUPPORTED_ENCODING) - __xmlErrEncoding(ctxt, res, "Unsupported encoding: %s\n", - (const xmlChar *) encoding, NULL); - else - xmlFatalErr(ctxt, res, NULL); + xmlFatalErr(ctxt, res, encoding); return(-1); } @@ -1188,9 +1137,7 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, if (nbchars == XML_ENC_ERR_MEMORY) { xmlErrMemory(ctxt); } else if (nbchars < 0) { - xmlErrInternal(ctxt, - "switching encoding: encoder error\n", - NULL); + xmlCtxtErrIO(ctxt, in->error, NULL); xmlHaltParser(ctxt); return (-1); } diff --git a/result/errors/754947.xml.ent b/result/errors/754947.xml.ent index 372248b5..ee29e06d 100644 --- a/result/errors/754947.xml.ent +++ b/result/errors/754947.xml.ent @@ -1,4 +1,4 @@ -./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding ! +./test/errors/754947.xml:1: I/O error : Invalid bytes in character encoding Bytes: 0xEE 0x5D 0x5D 0x3E buf == NULL) || (input->buf->encoder == NULL)) && + (code == XML_ERR_INVALID_ENCODING) && + (input->cur < input->end)) { + int i; + + channel(data, "Bytes:"); + for (i = 0; i < 4; i++) { + if (input->cur + i >= input->end) + return; + channel(data, " 0x%02X", input->cur[i]); + } + channel(data, "\n"); + } + xmlParserPrintFileContextInternal(input, channel, data); + if (cur != NULL) { if (cur->filename) channel(data, "%s:%d: \n", cur->filename, cur->line); diff --git a/testchar.c b/testchar.c index f4b2f4f5..13887cd8 100644 --- a/testchar.c +++ b/testchar.c @@ -310,9 +310,9 @@ static int testCharRangeByte1(xmlParserCtxtPtr ctxt) { c = testCurrentChar(ctxt, &len); if (c < 0) continue; - if ((i == 0) || (i >= 0x80)) { + if (i >= 0x80) { /* we must see an error there */ - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Byte 0x%02X\n", i); return(1); @@ -349,7 +349,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) { /* if first bit of first char is set, then second bit must too */ if ((i & 0x80) && ((i & 0x40) == 0)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n", i, j); @@ -362,7 +362,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) { * bits must be 10 */ else if ((i & 0x80) && ((j & 0xC0) != 0x80)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n", i, j, c); @@ -375,7 +375,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) { * than 0x80, i.e. one of bits 5 to 1 of i must be set */ else if ((i & 0x80) && ((i & 0x1E) == 0)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n", i, j, c); @@ -388,7 +388,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) { * at least 3 bytes, but we give only 2 ! */ else if ((i & 0xE0) == 0xE0) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n", i, j); @@ -446,7 +446,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) { * at least 4 bytes, but we give only 3 ! */ if ((i & 0xF0) == 0xF0) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n", i, j, K, data[3]); @@ -458,7 +458,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) { * The second and the third bytes must start with 10 */ else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n", i, j, K); @@ -472,7 +472,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) { * the 6th byte of data[1] must be set */ else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n", i, j, K); @@ -484,7 +484,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) { * There are values that are not allowed in UTF-8 */ else if ((value > 0xD7FF) && (value <0xE000)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n", value, i, j, K); @@ -548,7 +548,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) { * at least 5 bytes, but we give only 4 ! */ if ((i & 0xF8) == 0xF8) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n", i, j, K, data[3]); @@ -561,7 +561,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) { */ else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) || ((L & 0xC0) != 0x80)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n", i, j, K, L); @@ -575,7 +575,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) { * the 6 or 5th byte of j must be set */ else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n", i, j, K, L); @@ -588,7 +588,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) { */ else if (((value > 0xD7FF) && (value < 0xE000)) || (value > 0x10FFFF)) { - if (lastError != XML_ERR_INVALID_CHAR) { + if (lastError != XML_ERR_INVALID_ENCODING) { fprintf(stderr, "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n", value, i, j, K, L);