From 855818bd2b816909b82963ae6549db662c4b8080 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 8 Aug 2023 15:21:37 +0200 Subject: [PATCH] parser: Check for truncated multi-byte sequences When decoding input data, check whether the "raw" buffer is empty after parsing the document. Otherwise, the input ends with a truncated multi-byte sequence which shouldn't be silently ignored. --- parser.c | 14 ++++++++++++-- result/errors/truncated-utf16.xml.ent | 3 +++ result/errors/truncated-utf16.xml.err | 3 +++ result/errors/truncated-utf16.xml.str | 4 ++++ test/errors/truncated-utf16.xml | Bin 0 -> 11 bytes 5 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 result/errors/truncated-utf16.xml.ent create mode 100644 result/errors/truncated-utf16.xml.err create mode 100644 result/errors/truncated-utf16.xml.str create mode 100644 test/errors/truncated-utf16.xml diff --git a/parser.c b/parser.c index 2ebc3799..53d77775 100644 --- a/parser.c +++ b/parser.c @@ -10455,7 +10455,12 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { if (RAW != 0) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); - } + } else if ((ctxt->input->buf != NULL) && + (ctxt->input->buf->encoder != NULL) && + (!xmlBufIsEmpty(ctxt->input->buf->raw))) { + xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, + "Truncated multi-byte sequence at EOF\n"); + } ctxt->instate = XML_PARSER_EOF; } @@ -11686,7 +11691,12 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, if ((ctxt->instate == XML_PARSER_EPILOG) && (ctxt->input->cur < ctxt->input->end)) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); - } + } else if ((ctxt->input->buf != NULL) && + (ctxt->input->buf->encoder != NULL) && + (!xmlBufIsEmpty(ctxt->input->buf->raw))) { + xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, + "Truncated multi-byte sequence at EOF\n"); + } if (ctxt->instate != XML_PARSER_EOF) { if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); diff --git a/result/errors/truncated-utf16.xml.ent b/result/errors/truncated-utf16.xml.ent new file mode 100644 index 00000000..f5be53cb --- /dev/null +++ b/result/errors/truncated-utf16.xml.ent @@ -0,0 +1,3 @@ +./test/errors/truncated-utf16.xml:1: parser error : Truncated multi-byte sequence at EOF + + ^ diff --git a/result/errors/truncated-utf16.xml.err b/result/errors/truncated-utf16.xml.err new file mode 100644 index 00000000..f5be53cb --- /dev/null +++ b/result/errors/truncated-utf16.xml.err @@ -0,0 +1,3 @@ +./test/errors/truncated-utf16.xml:1: parser error : Truncated multi-byte sequence at EOF + + ^ diff --git a/result/errors/truncated-utf16.xml.str b/result/errors/truncated-utf16.xml.str new file mode 100644 index 00000000..e45c5788 --- /dev/null +++ b/result/errors/truncated-utf16.xml.str @@ -0,0 +1,4 @@ +./test/errors/truncated-utf16.xml:1: parser error : Truncated multi-byte sequence at EOF + + ^ +./test/errors/truncated-utf16.xml : failed to parse diff --git a/test/errors/truncated-utf16.xml b/test/errors/truncated-utf16.xml new file mode 100644 index 0000000000000000000000000000000000000000..b755ddd46560bb193707438032a3b601ef78f195 GIT binary patch literal 11 ScmezW&xRp|L7%~nArSx@$^&cw literal 0 HcmV?d00001