From a6c76a26ca6c6e68dc1735195d1daa5c1ce1fcb5 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Wed, 26 Aug 2009 14:37:00 +0200 Subject: [PATCH] 566012 part 2 fix regresion tests and push mode * test/utf16bebom.xml: regression test showed that this test case was broken but previous behaviour would not detect it ! * parser.c: fix 566012 for the push mode of the parser, tricky ! * test/ebcdic_566012.xml result//ebcdic_566012.xml*: add the test to the regression suite --- parser.c | 56 ++++++++++++++++++++++++++++++--- result/ebcdic_566012.xml | 1 + result/ebcdic_566012.xml.rde | 1 + result/ebcdic_566012.xml.rdr | 1 + result/ebcdic_566012.xml.sax | 5 +++ result/ebcdic_566012.xml.sax2 | 5 +++ result/noent/ebcdic_566012.xml | 1 + test/ebcdic_566012.xml | 1 + test/utf16bebom.xml | Bin 344 -> 346 bytes 9 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 result/ebcdic_566012.xml create mode 100644 result/ebcdic_566012.xml.rde create mode 100644 result/ebcdic_566012.xml.rdr create mode 100644 result/ebcdic_566012.xml.sax create mode 100644 result/ebcdic_566012.xml.sax2 create mode 100644 result/noent/ebcdic_566012.xml create mode 100644 test/ebcdic_566012.xml diff --git a/parser.c b/parser.c index 0d856b74..efad2f12 100644 --- a/parser.c +++ b/parser.c @@ -10007,6 +10007,12 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { } xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); } + + /* + * We can grow the input buffer freely at that point + */ + GROW; + SKIP_BLANKS; ctxt->input->standalone = xmlParseSDDecl(ctxt); @@ -11493,6 +11499,7 @@ int xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate) { int end_in_lf = 0; + int remain = 0; if (ctxt == NULL) return(XML_ERR_INTERNAL_ERROR); @@ -11505,12 +11512,41 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, end_in_lf = 1; size--; } + +xmldecl_done: + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { int base = ctxt->input->base - ctxt->input->buf->buffer->content; int cur = ctxt->input->cur - ctxt->input->base; int res; - + + /* + * Specific handling if we autodetected an encoding, we should not + * push more than the first line ... which depend on the encoding + * And only push the rest once the final encoding was detected + */ + if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { + int len = 45; + + if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UTF-16")) || + (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UTF16"))) + len = 90; + else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UCS-4")) || + (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, + BAD_CAST "UCS4"))) + len = 180; + + if (ctxt->input->buf->rawconsumed < len) + len -= ctxt->input->buf->rawconsumed; + + remain = size - len; + size = len; + } res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); if (res < 0) { ctxt->errNo = XML_PARSER_EOF; @@ -11531,7 +11567,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, if ((in->encoder != NULL) && (in->buffer != NULL) && (in->raw != NULL)) { int nbchars; - + nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); if (nbchars < 0) { /* TODO 2.6.0 */ @@ -11542,13 +11578,23 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, } } } - xmlParseTryOrFinish(ctxt, terminate); + if (remain != 0) + xmlParseTryOrFinish(ctxt, 0); + else + xmlParseTryOrFinish(ctxt, terminate); + if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) + return(ctxt->errNo); + + if (remain != 0) { + chunk += size; + size = remain; + remain = 0; + goto xmldecl_done; + } if ((end_in_lf == 1) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); } - if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) - return(ctxt->errNo); if (terminate) { /* * Check for termination diff --git a/result/ebcdic_566012.xml b/result/ebcdic_566012.xml new file mode 100644 index 00000000..153add50 --- /dev/null +++ b/result/ebcdic_566012.xml @@ -0,0 +1 @@ +Lo§”“@₯…™’‰–•~ρKπ@…•ƒ–„‰•‡~ΙΒΤ`ρρτρon%L£…’£@££™~JΰZan% \ No newline at end of file diff --git a/result/ebcdic_566012.xml.rde b/result/ebcdic_566012.xml.rde new file mode 100644 index 00000000..efbc18b8 --- /dev/null +++ b/result/ebcdic_566012.xml.rde @@ -0,0 +1 @@ +0 1 test 1 0 diff --git a/result/ebcdic_566012.xml.rdr b/result/ebcdic_566012.xml.rdr new file mode 100644 index 00000000..efbc18b8 --- /dev/null +++ b/result/ebcdic_566012.xml.rdr @@ -0,0 +1 @@ +0 1 test 1 0 diff --git a/result/ebcdic_566012.xml.sax b/result/ebcdic_566012.xml.sax new file mode 100644 index 00000000..7ec6d5a7 --- /dev/null +++ b/result/ebcdic_566012.xml.sax @@ -0,0 +1,5 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(test, attr='Γ„Γ–Γœ') +SAX.endElement(test) +SAX.endDocument() diff --git a/result/ebcdic_566012.xml.sax2 b/result/ebcdic_566012.xml.sax2 new file mode 100644 index 00000000..b8a4ce0e --- /dev/null +++ b/result/ebcdic_566012.xml.sax2 @@ -0,0 +1,5 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElementNs(test, NULL, NULL, 0, 1, 0, attr='ÄÖ...', 6) +SAX.endElementNs(test, NULL, NULL) +SAX.endDocument() diff --git a/result/noent/ebcdic_566012.xml b/result/noent/ebcdic_566012.xml new file mode 100644 index 00000000..153add50 --- /dev/null +++ b/result/noent/ebcdic_566012.xml @@ -0,0 +1 @@ +Lo§”“@₯…™’‰–•~ρKπ@…•ƒ–„‰•‡~ΙΒΤ`ρρτρon%L£…’£@££™~JΰZan% \ No newline at end of file diff --git a/test/ebcdic_566012.xml b/test/ebcdic_566012.xml new file mode 100644 index 00000000..09b4e7b6 --- /dev/null +++ b/test/ebcdic_566012.xml @@ -0,0 +1 @@ +Lo§”“@₯…™’‰–•~ρKπ@…•ƒ–„‰•‡~ΙΒΤ`ρρτρ@on%L£…’£@££™~JΰZ@an% \ No newline at end of file diff --git a/test/utf16bebom.xml b/test/utf16bebom.xml index f0c2c2be4305eced4b1824091c28f8246bd32d74..8c402e02c0ed3261e38b73e81838959aa4b76852 100644 GIT binary patch delta 11 Scmcb?bc>1k-+zV;;fw$tl6S